In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import numpy as np
from geopandas.tools import sjoin
import shapely
from shapely.geometry import Point
import pyreadr
import mercantile
from shapely.geometry import shape
import networkx as nx
import seaborn as sn
import pickle
import scipy.sparse as sp
from scipy.optimize import least_squares
import datetime
import lmfit
from sklearn import metrics
import seaborn as sns

# Generation of timeseries of networks for daily mobility flows

## Import data and preprocessing

In [2]:
# Import data 
root_data = '/Users/ignaciosacristanbarba/Documents/M4R/Data'

# Reads in the timeseries of between tiles movement data
root1 = root_data+'/BETWEEN_TILES_TRIP_NUMBERS.csv'
df = pd.read_csv(root1)


# Load LSCC 
root_results = '/Users/ignaciosacristanbarba/Documents/M4R/Results' 
root_Base = root_results+'/Base Network/'
root2 = root_Base+'/base_network_lscc.npz'

with open(root2, 'rb') as handle:
    lscc_dict = pickle.load(handle)
        
# Generate DiGraph from data 
lscc = nx.from_dict_of_dicts(lscc_dict,create_using = nx.DiGraph)
lscc_nodes = list(lscc.nodes())
n_nodes = len(lscc_nodes)
node_dict = {lscc_nodes[i] : i for i in range(n_nodes)}


# Compute adjacency matrix and node list of LSCC
A_LSCC = nx.adjacency_matrix(lscc)

########################
# Load D_geom for LSCC #
########################

root_D_geom = root_Base + '/lscc_D_geom.pickle'
with open(root_D_geom, 'rb') as handle:
        D_geom = pickle.load(handle)
        
        
##############################
# Load D_geom for Full Graph #
##############################

root_D_geom_full = root_Base + '/base_D_geom.pickle'
with open(root_D_geom_full, 'rb') as handle:
        D_geom_full = pickle.load(handle)

###########################
# Get dates of timeseries #
###########################

# Get start and end dates
start_date = df.columns.values[2][:10]
end_date = df.columns.values[-1][:10]

# Generate DatetimeIndex
days = pd.date_range(start=start_date, end=end_date).date
n_days = len(days)
days_dm = np.asarray([str(days[i])[5:] for i in range(n_days)])
days_week = np.asarray(pd.date_range(start=start_date, end=end_date).weekofyear, dtype='int')
weeks = np.arange(days_week.min(),days_week.max())

# Indicate weekdays
weekday = pd.date_range(start=start_date, end=end_date).weekday<5

# Consider bank holidays
bank_holidays = []
bank_holidays.append(np.argwhere(days==datetime.date(2020, 4, 10))[0][0])
bank_holidays.append(np.argwhere(days==datetime.date(2020, 4, 13))[0][0])
bank_holidays.append(np.argwhere(days==datetime.date(2020, 5, 8))[0][0])
bank_holidays.append(np.argwhere(days==datetime.date(2020, 5, 25))[0][0])
weekday[bank_holidays] = False

# Store lockdown-date
lockdown_date = pd.to_datetime('20200324', format='%Y%m%d', errors='ignore')
lockdown_date_number = np.argwhere(days == lockdown_date).flatten()[0]
lockdown_week = lockdown_date.week

# Store date information
timestamps = {'days' : days, 'weekdays' : weekday, 'weeks' : weeks, 
              'lockdown_day' : lockdown_date}

root_store = '/Users/ignaciosacristanbarba/Documents/M4R/Results'+'/Timeseries/'
root3 = root_store+'timestamps.pkl'

# Store data (serialize)
with open(root3, 'wb') as handle:
    pickle.dump(timestamps, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [3]:
# Import NUTS shape files
root = '/Users/ignaciosacristanbarba/Documents/M4R/Data'
root_map = root+'/NUTS_Level_3__January_2018__Boundaries-shp/NUTS_Level_3__January_2018__Boundaries.shp'
map_gdf = gpd.read_file(root_map)
map_gdf = map_gdf.to_crs("EPSG:3395")

# Get NUTS3 
cols = [0,2,3,4,5,6,7,8]
gdf_NUTS3 = map_gdf.drop(map_gdf.columns[cols],axis=1)
gdf_NUTS3.rename(columns={'nuts318cd': 'nuts'}, inplace=True)

## Generation of daily evolution of LSCC

Generate network for each day by summing over all hours per day. 

In [4]:
def rewire_graph(G):
    '''
    Permute nodes in datastructure such that G.nodes() == lscc.nodes()
    '''
    n_nodes = len(G.nodes())
    G_dict = nx.to_dict_of_dicts(G)
    new_dict = {list(lscc_dict.keys())[i] : G_dict[list(lscc_dict.keys())[i]] for i in range(n_nodes)}
    G_new = nx.from_dict_of_dicts(new_dict, create_using = nx.DiGraph)
    
    return G_new

In [5]:
compute_again = False

if compute_again == True:
    
    # Converting the quadkeys to strings
    df['start_quadkey'] = df['start_quadkey'].astype(str)
    df['end_quadkey'] = df['end_quadkey'].astype(str)
    # adding a leading '0' to quadkeys beginning with 3 so it maps on to web mercator
    df.loc[df['start_quadkey'].str[:1] == '3', 'start_quadkey'] = '0'+df['start_quadkey']
    df.loc[df['end_quadkey'].str[:1] == '3', 'end_quadkey'] = '0'+df['end_quadkey']

    # Replace nan by 0
    df = df.fillna(0)
    
    ###################################
    # Filtering out rows outside LSCC #
    ###################################

    # Get LSCC quadkeys
    quadkeys_timeseries_LSCC = set(lscc_dict.keys())

    df_filtered = df.copy()

    for i in range(df.shape[0]):

        start_included =  df.iloc[i]['start_quadkey'] in quadkeys_timeseries_LSCC
        end_included = df.iloc[i]['end_quadkey'] in quadkeys_timeseries_LSCC 
        included = start_included + end_included

        if included != 2:
            df_filtered = df_filtered.drop([i])

    print('Number of deleted rows:', df.shape[0] - df_filtered.shape[0])

    df = df_filtered.reset_index(drop=True)
    
    #####################################
    # Generate daily DiGraphs from data #
    #####################################

    # Get the first two quadkey columns
    df_key = df.iloc[:,:2]

    M = df.shape[1]-3
    networks = []

    for i in range(1,M,3):

        # Get daily data 
        df_mov = df.iloc[:,i+1:i+4]
        df_sum = pd.Series(df_mov.sum(axis=1),name='movement')
        df_sum = pd.concat([df_key,df_sum],axis=1)

        # Generate DiGraph 
        N = len(df_sum)
        G = nx.DiGraph()
        # for each row, add nodes and weighted edge
        for j in range(0,N):
            start = df_sum['start_quadkey'][j]
            end = df_sum['end_quadkey'][j]
            weight = df_sum['movement'][j]
            G.add_node(start)
            G.add_node(end)
            if weight > 0.0:
                G.add_weighted_edges_from([(start, end, weight)] )

        # Append DiGraph to list of networks
        networks.append(G)

    ###########################################
    # Permute nodes such that they match LSCC #
    ###########################################

    for i in range(len(networks)):
        networks[i] = rewire_graph(networks[i])
        
    ########################
    # Store daily DiGraphs #
    ########################

    root4 = root_store+'timeseries_daily_digraphs_lscc.pkl'

    # Store data (serialize)
    with open(root4, 'wb') as handle:
        pickle.dump(networks, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
# Load networks      
else: 
    
    root4 = root_store+'timeseries_daily_digraphs_lscc.pkl'
    with open(root4, 'rb') as handle:
        networks = pickle.load(handle)
        
##############################
# Compute adjacency matrices #
##############################

n_networks = len(networks)
networks_adjacency = [nx.adjacency_matrix(networks[i]) for i in range(n_networks)]
        

#####################
# Compute geography #
#####################

# add polygons as node attributes
G = networks[0]
quadkeys = list(G.nodes)
n_nodes = len(quadkeys)

polys = []

#Iterates over the quadkeys to extract the tiles
for quadkey in quadkeys:
    tile = mercantile.feature(mercantile.quadkey_to_tile(quadkey), projected = 'web mercator')
    polys.append(tile.get('geometry'))

geom = [shape(i) for i in polys]
geom_dict = {quadkeys[i] : list(geom[i].centroid.bounds[:2]) for i in range(0,n_nodes)}

n_networks = len(networks)

for i in range(0,n_days):
    G = networks[i]
    nx.set_node_attributes(G,geom_dict,'geom')

# Store node keys
node_numbers = {i : list(networks[0].nodes())[i] for i in range(n_nodes)}
# Gemo dicy for node keys
geom_dict_numbers = {i : geom_dict[node_numbers[i]] for i in range(n_nodes)}    

In [6]:
len(node_numbers)

3125

All networks have the same nodes and the same node keys.

In [7]:
root_figure = '/Users/ignaciosacristanbarba/Documents/M4R/Figures/'

## Compute evolution of radius of gyration for LSCC

In [8]:
D2_total = (D_geom + np.diag(2.5*np.ones(n_nodes)))**2
D2_inter  = (D_geom )**2

In [9]:
r_total_nodal = np.zeros((n_days, n_nodes))
r_total_median = []
r_total_Q1 = []
r_total_Q3 = []
r_inter_nodal = np.zeros((n_days, n_nodes))
r_inter_median = []
r_inter_Q1 = []
r_inter_Q3 = []

for i, A in enumerate(networks_adjacency):
    # Get adjacency matrix
    A = A.toarray()
    # Compute out strengths
    d_out = np.sum(A,axis=1)
    # Compute radius of Gyration with self-loops
    r = np.sqrt(np.sum(A/d_out*D2_total, axis = 1))
    r_total_nodal[i,:] = r
    # Without self-loops
    r = np.sqrt(np.sum(A/d_out*D2_inter, axis = 1))
    r_inter_nodal[i,:] = r
    
r_total_median = np.median(r_total_nodal,axis=1)
r_total_Q1 = np.percentile(r_total_nodal,25,axis=1)
r_total_Q3 = np.percentile(r_total_nodal,75,axis=1)
r_inter_median = np.median(r_inter_nodal,axis=1)
r_inter_Q1 = np.percentile(r_inter_nodal,25,axis=1)
r_inter_Q3 = np.percentile(r_inter_nodal,75,axis=1)

# Store results in dataframe
results = pd.DataFrame({'day':days})
results['r_total_median'] = r_total_median
results['r_total_Q1'] = r_total_Q1
results['r_total_Q3'] = r_total_Q3
results['r_inter_median'] = r_inter_median
results['r_inter_Q1'] = r_inter_Q1
results['r_inter_Q3'] = r_inter_Q3 
    
# Compute baseline values
A = A_LSCC.toarray()
d_out = np.sum(A,axis=1)
r_total_nodal_lscc = np.sqrt(np.sum(A/d_out*D2_total, axis = 1))
r_total_median_lscc = np.percentile(r_total_nodal_lscc,50)
r_total_Q1_lscc = np.percentile(r_total_nodal_lscc,25)
r_total_Q3_lscc = np.percentile(r_total_nodal_lscc,75)
r_inter_nodal_lscc = np.sum(A/d_out*D2_inter, axis = 1)
r_inter_median_lscc = np.percentile(r_inter_nodal_lscc,50)
r_inter_Q1_lscc = np.percentile(r_inter_nodal_lscc,25)
r_inter_Q3_lscc = np.percentile(r_inter_nodal_lscc,75)

# Save

In [10]:
#save r_total_nodal
root4 = root_results + '/Timeseries' +'/radius_of_gyration_lscc.csv' 
col_names = [node_numbers[i] for i in range(n_nodes)]
r_total_nodal_df = pd.DataFrame(r_total_nodal,columns = col_names).transpose()
r_total_nodal_df.to_csv(root4)