# Grid generation
In this notebook we generate the location grid, then we proceed to visualize trajectories on the grid.

We start with an inizialization block.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml

with open("conf.yaml") as f:
    conf = yaml.load(f, Loader = yaml.FullLoader)

base_path = conf["base_path"]
data_path = conf["data_path"]
out_path = conf["out_path"]
detour_radius = conf["detour_radius"]
lambdas = conf["lambdas"]
delta = conf["delta"]

# Define a basic Haversine distance formula
def haversine(lat1, lon1, lat2, lon2):
    
    lat1, lon1, lat2, lon2 = map(np.deg2rad, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1 
    dlon = lon2 - lon1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    #total_meters = METERS * c
    r = 6371000 #radiu * 1000 to return meters
    return c * r



## Step by step generation
Now we define a bounding box of the Beijing region and generate a grid according to it

In [3]:
import shapely.geometry
import pyproj

# Set up projections
p_ll = pyproj.Proj(init='epsg:4214')
p_mt = pyproj.Proj(init='epsg:3857') # metric; same as EPSG:900913

# Create corners of rectangle to be transformed to a grid
sw = shapely.geometry.Point(115.7657, 39.6332)
ne = shapely.geometry.Point(116.7435, 40.1411)
# This is a wide region in the beijing area

stepsize = 6561.68 # 5000m grid step size
# Project corners to target projection
transformed_sw = pyproj.transform(p_ll, p_mt, sw.x, sw.y) # Transform SW point to 3857
transformed_ne = pyproj.transform(p_ll, p_mt, ne.x, ne.y) # .. same for NE

# Iterate over 2D area
gridpoints = []
x = transformed_sw[0]
while x < transformed_ne[0]:
    y = transformed_sw[1]
    while y < transformed_ne[1]:
        p = shapely.geometry.Point(pyproj.transform(p_mt, p_ll, x, y))
        gridpoints.append(p)
        y += stepsize
    x += stepsize
    #print("generated {:d} points".format(len(gridpoints)))

grid = [(p.x, p.y) for p in gridpoints]
cols = ("lon", "lat")
grid_df = pd.DataFrame(grid, columns=cols)

print("Finished generating a regular grid with {:d} locations".format(len(grid_df)))

  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  from ipykernel import kernelapp as app
  app.launch_new_instance()


Finished generating a regular grid with 204 locations


Now we serialize our grid as a .csv

In [14]:
grid_df.to_csv(data_path+"5000_m_grid.csv")

## Visualizing locations and trajectories
We use folium for a deeper visualization of grid locations, including some user trajectories.


In [6]:
print("There are a total of {:d} locations in our dataset".format(len(grid_df)))

grid_df["location"] = grid_df.index.values
print(grid_df.head())

NameError: name 'grid_df' is not defined

We proceed to read the dataset from disk

In [12]:
cols = ["lat", "lon", "uid", "tid","date_time"]
dataset = pd.read_csv(data_path + "augmented_dataset.csv", usecols=cols, parse_dates = True)
print("Successfully loaded trajectory dataset")
print(dataset.head())

Successfully loaded trajectory dataset
         lat         lon   uid    tid                   date_time
0  39.910381  116.432626  3000  18644         2008-07-19 06:11:02
1  39.910382  116.432685  3000  18644  2008-07-19 06:11:02.045486
2  39.910382  116.432743  3000  18644  2008-07-19 06:11:02.090970
3  39.910383  116.432802  3000  18644  2008-07-19 06:11:02.136452
4  39.910383  116.432860  3000  18644  2008-07-19 06:11:02.181931


## Folium interactive map
We use a folium interactive map to show trajectories intersecting each location's detour radius.

In [8]:
import skmob
import folium

tdf = skmob.TrajDataFrame(dataset, latitude='lat', longitude="lon", datetime='date_time', user_id='uit', trajectory_id="tid")

m = folium.Map(location=[39.9042, 116.4074], tiles="Stamen Toner")

for lat, lon, index in zip(grid_df.lat.values, grid_df.lon.values, grid_df.index.values):
    #folium.Marker([lat, lon], popup="location" + str(index), tooltip=str(index)).add_to(m)
    folium.Circle(
        location=[lat, lon],
        radius=500,
        popup='location {:d} radius'.format(index),
        color='#3186cc',
        fill=True,
        fill_color='#3186cc'
        ).add_to(m)

#change this to plot more trajectories
trajs_to_plot = 30
for tid in tdf.tid.unique():
    tdf[tdf["tid"] == tid].plot_trajectory(m, weight=4, opacity=1, start_end_markers=False)
    trajs_to_plot -= 1
    if (trajs_to_plot == 0):
        break

m

NameError: name 'grid_df' is not defined

Now we save our map on disk as an html file

In [13]:
m.save(outfile= out_path + "locations_grid_trajectories.html")

# Different strategies for location picking
we investigate new strategies for choosing our locations.

We begin with a random picking and then we get picks from an external csv.

## Random picking

In [15]:
import random as rand

#picking 50 random points in our inner bounding box perimeter
points = 50
rd_pts = [(rand.uniform(39.8, 40.0), (rand.uniform(116.2, 116.6))) for i in range(points)]
cols = ("lat", "lon")
rd_locations = pd.DataFrame(rd_pts, columns=cols)

In [16]:
print(rd_locations.head())

         lat         lon
0  39.958259  116.301212
1  39.989632  116.535627
2  39.950774  116.364711
3  39.985256  116.461522
4  39.910767  116.513713


Now we visualize the random grid in a folium map

In [9]:
rd_m = folium.Map(location=[39.9042, 116.4074], tiles="Stamen Toner")

for lat, lon, index in zip(rd_locations.lat.values, rd_locations.lon.values, rd_locations.index.values):
    #folium.Marker([lat, lon], popup="location" + str(index), tooltip=str(index)).add_to(rd_m)
    folium.Circle(
        location=[lat, lon],
        radius=1000,
        popup='location {:d} radius'.format(index),
        color='#3186cc',
        fill=True,
        fill_color='#3186cc'
        ).add_to(rd_m)

#change this to plot more trajectories
trajs_to_plot = 20
for tid in tdf.tid.unique():
    tdf[tdf["tid"] == tid].plot_trajectory(rd_m, weight=4, opacity=1, start_end_markers=False)
    trajs_to_plot -= 1
    if (trajs_to_plot == 0):
        break

rd_m

NameError: name 'rd_locations' is not defined

## CSV loading
we try loading a previously generated csv with POIs (such as subway stations, in this example) in order to use them as locations.

In [10]:
df = pd.read_csv(data_path+"POIs.csv")
df = df[(df['lat'].between(39.8, 40.0)) & (df['lon'].between(116.2, 116.6))]
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 544 entries, 0 to 954
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   name     456 non-null    object 
 1   name:en  331 non-null    object 
 2   lat      544 non-null    float64
 3   lon      544 non-null    float64
dtypes: float64(2), object(2)
memory usage: 21.2+ KB
None


In [13]:
import folium
import pandas as pd
import skmob

df = pd.read_csv(data_path + "beijing_subway_stations.csv")
tdf = skmob.TrajDataFrame(dataset, latitude='lat', longitude="lon", datetime='date_time', user_id='uit', trajectory_id="tid")



csv_m = folium.Map(location=[39.9042, 116.4074], tiles="Stamen Toner")

for lat, lon, name in zip(df.lat, df.lon, df.name):
    #folium.Marker((lat, lon), popup=name).add_to(csv_m)
    folium.Circle(
        location=[lat, lon],
        radius=250,
        #popup='{:s} station radius'.format(name),
        color='#3186cc',
        fill=True,
        fill_color='#3186cc'
        ).add_to(csv_m)

#change this to plot more trajectories
trajs_to_plot = 20
for tid in tdf.tid.unique():
    tdf[tdf["tid"] == tid].plot_trajectory(csv_m, weight=4, opacity=1, start_end_markers=False)
    trajs_to_plot -= 1
    if (trajs_to_plot == 0):
        break


csv_m

## Considerations on fixed locations
Metro stations might be especially useful to get coverage from people going by foot, while other types of facilities might be exploited for different kinds of mobility profiles.
Motorway Junctions could be good if we had to address a mainly car-based mobility profile, while bycicle lanes could be used for cyclists.