## Different coverage scenarios
In this notebook we analyze various scenarios regarding different kinds of sensing i.e. environmental monitoring, traffic fluxes and  POI-based sensing.

In [1]:
#we set up all the functions we need and load up some datasets.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml

with open("conf.yaml") as f:
    conf = yaml.load(f, Loader = yaml.FullLoader)

base_path = conf["base_path"]
data_path = conf["data_path"]
out_path = conf["out_path"]

cols = ["lat", "lon", "uid", "tid","date_time"]

# Define a basic Haversine distance formula
def haversine(lat1, lon1, lat2, lon2):
    
    lat1, lon1, lat2, lon2 = map(np.deg2rad, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1 
    dlon = lon2 - lon1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    #total_meters = METERS * c
    r = 6371000 #radiu * 1000 to return meters
    return c * r

import scipy.integrate as integrate

def coverage(loc_array, tot_dists_df, lam, delta):
    coverage = pd.DataFrame()
    coverage["location"] = loc_array
    coverage["probability"] = calculate_coverage(loc_array, tot_dists_df, lam, delta)
    return coverage

def calculate_coverage(locations, tot_dists_df, lam, delta):
    
    lenght = len(locations)
    count = 0
    coverages = []
    
    #initialize coverage inner function
    lam = lam
    delta = delta
    inner = lambda x: lam * (1/( (np.e)**(lam*x) ))
    integral = lambda x: 1 - integrate.quad(inner, x, x+delta)[0]
    vec_integral = np.vectorize(integral)
    print("Calculating coverage with lambda set to: {:4f} and delta set to: {:f}".format(lam, delta))
    
    for loc in locations:
        
        #if the location has no points whatsoever, assign 0 to the probability and continue
        if(tot_dists_df[tot_dists_df.location == loc].uid.isnull().values.any()):
            coverages.append(0)
            count += 1
            print("Computed coverage on {:d} out of {:d} locations, with lambda set to {:f}".format(count, lenght, lam), end="\r")
            continue
        
        tot_dists_location = tot_dists_df[tot_dists_df.location == loc]
        uids = tot_dists_location.uid.unique()
        #print(uids)
        user_probs = []

        for user in uids:
            #print(user)
            tot_dists_user = tot_dists_location[(tot_dists_location.uid == user)]
            
            #print(tot_dists_user)
            inner_vals = vec_integral(tot_dists_user.distance.values)
            prob = 1 - np.prod(inner_vals)
            user_probs.append(prob)

        location_coverage = 1 - np.prod(user_probs)
        coverages.append(location_coverage)
        count += 1
        print("Computed coverage on {:d} out of {:d} locations, with lambda set to {:f}".format(count, lenght, lam), end="\r")
        
    return coverages

#calculates coverages on multiple lambda values and serializes them on disk
def coverage_multiple_lambdas(lambdas, delta, locations, tot_dists_df, prefix):
    for value in lambdas:
        lam = value
        coverages = coverage(locations, tot_dists_df, lam, delta)
        #merge with df_t dataframe with location positions on "location" id then serialize on disk
        merged = df.merge(coverages, on="location")
        name = prefix + "_coverages_" + str(value).replace(".", ",") + "_lambda.csv"
        merged.to_csv(data_path + name)
        

dataset = pd.read_csv(data_path + "augmented_dataset.csv", usecols=cols, parse_dates = True)
print("Successfully loaded trajectory dataset")



Successfully loaded trajectory dataset


We load up lambda values

In [2]:
#used to parse the conf lambdas
def convert(s):
    try:
        return float(s)
    except ValueError:
        num, denom = s.split('/')
        return float(num) / float(denom)

lambdas = (list(map(convert, conf["lambdas"])))

print(lambdas)

[0.01, 0.00333333, 0.00142857]


## Traffic fluxes sensing
We load up a csv comprising all of Beijing's subway stations subsequently computing coverage on it after filtering our locations according to a certain detour radius.

In [3]:
#We load up the dataset
df = pd.read_csv(data_path + "beijing_subway_stations.csv")
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 312 entries, 0 to 311
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    312 non-null    object 
 1   lat     312 non-null    float64
 2   lon     312 non-null    float64
dtypes: float64(2), object(1)
memory usage: 7.4+ KB
None


In [4]:
#restricting according to detour radius
#for each user, for each trajectory of the user, get the distance of all its waypoints that are in our detour radius' range

detour_radius = conf["detour_radius"] #meters
dists = []
count = 0

for location in df.index.values:
    d_i_h = pd.DataFrame()
    d_i_h["distance"] = haversine(df.loc[location].lat, df.loc[location].lon, dataset["lat"].values, dataset["lon"].values)
    d_i_h = d_i_h[d_i_h.distance < detour_radius]
    if (len(d_i_h) > 0):
        d_i_h["uid"] = dataset["uid"]
        d_i_h["location"] = location
    else:
        d_i_h = pd.DataFrame({"location":location, "uid":np.nan, "distance":np.nan}, index=[0])
        count += 1
        #print(d_i_h)
    dists.append(d_i_h)
    
print("There are {:d} locations that have no points in detour radius' range".format(count)) 

subway_dists = pd.concat(dists)
print(len(subway_dists.location.unique()), " - Unique sensing locations")

There are 10 locations that have no points in detour radius' range
312  - Unique sensing locations


In [5]:
#print(tot_dists.location.unique())
locations = subway_dists.location.unique()
df["location"] = locations

print(df)

     name        lat         lon  location
0      西四  39.922554  116.366718         0
1      高井  39.947944  116.152161         1
2     金台路  39.921706  116.472480         2
3      枣营  39.942932  116.469014         3
4    朝阳公园  39.931925  116.472418         4
..    ...        ...         ...       ...
307  六道口站  39.999398  116.347699       307
308   首经贸  39.843178  116.314072       308
309   高楼金  39.861837  116.679565       309
310    群芳  39.862322  116.664681       310
311   万盛东  39.862333  116.650963       311

[312 rows x 4 columns]


### Traffic fluxes - coverage probability
Now we compute coverage on the locations according to our model serialize resulting csvs.

In [6]:
#print(lambdas)

delta = conf["delta"]
prefix = "subway" #used to get a meeaningful name for out .csv

print("We're about to calculate coverage with the following lambdas: ", lambdas)

coverage_multiple_lambdas(lambdas, delta, locations, subway_dists, prefix)

We're about to calculate coverage with the following lambdas:  [0.01, 0.00333333, 0.00142857]
Calculating coverage with lambda set to: 0.010000 and delta set to: 10.000000
Calculating coverage with lambda set to: 0.003333 and delta set to: 10.000000
Calculating coverage with lambda set to: 0.001429 and delta set to: 10.000000
Computed coverage on 312 out of 312 locations, with lambda set to 0.001429

## POI-based sensing
We load up a csv with landmarks and similar POIs in order to compute coverage on it, for a sensing based on a wider range of locations

In [8]:
#We load up the dataset
df = pd.read_csv(data_path + "POIs.csv")
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 956 entries, 0 to 955
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  956 non-null    int64  
 1   name        797 non-null    object 
 2   name:en     533 non-null    object 
 3   lat         956 non-null    float64
 4   lon         956 non-null    float64
dtypes: float64(2), int64(1), object(2)
memory usage: 37.5+ KB
None


In [9]:
#restricting according to detour radius
#for each user, for each trajectory of the user, get the distance of all its waypoints that are in our detour radius' range

detour_radius = conf["detour_radius"] #meters
dists = []
count = 0

for location in df.index.values:
    d_i_h = pd.DataFrame()
    d_i_h["distance"] = haversine(df.loc[location].lat, df.loc[location].lon, dataset["lat"].values, dataset["lon"].values)
    d_i_h = d_i_h[d_i_h.distance < detour_radius]
    if (len(d_i_h) > 0):
        d_i_h["uid"] = dataset["uid"]
        d_i_h["location"] = location
    else:
        d_i_h = pd.DataFrame({"location":location, "uid":np.nan, "distance":np.nan}, index=[0])
        count += 1
        #print(d_i_h)
    dists.append(d_i_h)
    
print("There are {:d} locations that have no points in detour radius' range".format(count)) 

POIs_dists = pd.concat(dists)
print(len(POIs_dists.location.unique()), " - Unique sensing locations")

There are 64 locations that have no points in detour radius' range
956  - Unique sensing locations


In [10]:
#print(tot_dists.location.unique())
locations = POIs_dists.location.unique()
df["location"] = locations

print(df)

     Unnamed: 0     name                          name:en        lat  \
0             0      北京东                     Beijing East  39.901209   
1             1       丰台                          Fengtai  39.846962   
2             2       北京                          Beijing  39.902166   
3             3       良乡                       Liangxiang  39.740095   
4             4       永乐                           Yongle  39.651827   
..          ...      ...                              ...        ...   
951         951     世纪之园                              NaN  39.730442   
952         952    天安门广场                Tian'anmen Square  39.902361   
953         953     恒基中心                 Henderson Center  39.905265   
954         954  人民英雄纪念碑  Monument to the People's Heroes  39.903195   
955         955   六佰本商业街                              NaN  40.007395   

            lon  location  
0    116.478643         0  
1    116.282636         1  
2    116.421012         2  
3    116.151421        

## POIs - coverage probability

In [11]:
#print(lambdas)

delta = conf["delta"]
prefix = "POIs" #used to get a meeaningful name for out .csv

print("We're about to calculate coverage with the following lambdas: ", lambdas)

coverage_multiple_lambdas(lambdas, delta, locations, POIs_dists, prefix)

We're about to calculate coverage with the following lambdas:  [0.01, 0.00333333, 0.00142857]
Calculating coverage with lambda set to: 0.010000 and delta set to: 10.000000
Calculating coverage with lambda set to: 0.003333 and delta set to: 10.000000
Calculating coverage with lambda set to: 0.001429 and delta set to: 10.000000
Computed coverage on 956 out of 956 locations, with lambda set to 0.001429

## Grid-based environmental sensing
We load up a previously generated regular grid in order to simulate an environmental type of sensing, (e.g. environmental values such as temperature, pressure etc.)

In [15]:
#We load up the grid dataset
df = pd.read_csv(data_path + "5000_m_grid.csv")
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 204 entries, 0 to 203
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  204 non-null    int64  
 1   lon         204 non-null    float64
 2   lat         204 non-null    float64
 3   location    204 non-null    int64  
dtypes: float64(2), int64(2)
memory usage: 6.5 KB
None


In [16]:
#restricting according to detour radius
#for each user, for each trajectory of the user, get the distance of all its waypoints that are in our detour radius' range

detour_radius = conf["detour_radius"] #meters
dists = []
count = 0

for location in df.index.values:
    d_i_h = pd.DataFrame()
    d_i_h["distance"] = haversine(df.loc[location].lat, df.loc[location].lon, dataset["lat"].values, dataset["lon"].values)
    d_i_h = d_i_h[d_i_h.distance < detour_radius]
    if (len(d_i_h) > 0):
        d_i_h["uid"] = dataset["uid"]
        d_i_h["location"] = location
    else:
        d_i_h = pd.DataFrame({"location":location, "uid":np.nan, "distance":np.nan}, index=[0])
        count += 1
        #print(d_i_h)
    dists.append(d_i_h)
    
print("There are {:d} locations that have no points in detour radius' range".format(count)) 

grid_dists = pd.concat(dists)
print(len(grid_dists.location.unique()), " - Unique sensing locations")

There are 69 locations that have no points in detour radius' range
204  - Unique sensing locations


In [17]:
#print(tot_dists.location.unique())
locations = grid_dists.location.unique()
df["location"] = locations

print(df)

     Unnamed: 0         lon        lat  location
0             0  115.765700  39.633200         0
1             1  115.765700  39.678581         1
2             2  115.765700  39.723932         2
3             3  115.765700  39.769253         3
4             4  115.765700  39.814545         4
..          ...         ...        ...       ...
199         199  116.708813  39.950240       199
200         200  116.708813  39.995412       200
201         201  116.708813  40.040555       201
202         202  116.708813  40.085667       202
203         203  116.708813  40.130750       203

[204 rows x 4 columns]


### Grid - coverage probability

In [18]:
#print(lambdas)

delta = conf["delta"]
prefix = "grid" #used to get a meeaningful name for out .csv

print("We're about to calculate coverage with the following lambdas: ", lambdas)

coverage_multiple_lambdas(lambdas, delta, locations, grid_dists, prefix)

We're about to calculate coverage with the following lambdas:  [0.01, 0.00333333, 0.00142857]
Calculating coverage with lambda set to: 0.010000 and delta set to: 10.000000
Calculating coverage with lambda set to: 0.003333 and delta set to: 10.000000
Calculating coverage with lambda set to: 0.001429 and delta set to: 10.000000
Computed coverage on 204 out of 204 locations, with lambda set to 0.001429