In [1]:
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import geopandas as gpd
import numpy as np
from tqdm import tqdm_notebook
from shapely.geometry import Point, LineString
import os

from datetime import datetime, timedelta

import matplotlib.pyplot as plt
%matplotlib inline

import folium
import geoplot


data_path = '/home/fterroso/data/'
tweets_path = '/home/fterroso/projects/twitter-crawler/streaming_tweets/'

Read geodataframe generated in 3b

In [2]:
ca_trips_gdf = gpd.read_file(os.path.join('data', 'census_trips_consecutive_enriched.geojson'), 
                          driver='GeoJSON', 
                          encoding='utf-8').to_crs({'init': 'epsg:25830'})
ca_trips_gdf['timestamp_o']= pd.to_datetime(ca_trips_gdf['timestamp_o'])
ca_trips_gdf['timestamp_d']= pd.to_datetime(ca_trips_gdf['timestamp_d'])
ca_trips_gdf['time_length']= ca_trips_gdf['time_length'].apply(lambda x: timedelta(seconds=x))

In [3]:
ca_trips_gdf.head()

Unnamed: 0,user_id,tw_id_o,timestamp_o,CUSEC_o,CPRO_o,CCA_o,tw_id_d,timestamp_d,CUSEC_d,CPRO_d,CCA_d,dist_km,time_length,speed_km_h,geometry
0,168285338,1234173492922322945,2020-03-01 17:47:45,2807901066,28,13,1234175220807802880,2020-03-01 17:54:37,2807901031,28,13,0.912968,00:06:52,7.977386,LINESTRING (440796.4167641253 4473756.06043492...
1,168285338,1234175220807802880,2020-03-01 17:54:37,2807901031,28,13,1234186473815015425,2020-03-01 18:39:20,2807902083,28,13,2.288855,00:44:43,3.071144,LINESTRING (439959.6752770033 4473390.85717998...
2,168285338,1235648826431717377,2020-03-05 19:30:12,2807901031,28,13,1235650767526055936,2020-03-05 19:37:54,2807902083,28,13,2.288855,00:07:42,17.835236,LINESTRING (439959.6752770033 4473390.85717998...
3,168285338,1256741073700954117,2020-05-03 00:23:15,2807901045,28,13,1256877766752034817,2020-05-03 09:26:25,2807902083,28,13,2.102432,09:03:10,0.232242,LINESTRING (440425.6348667498 4473757.88710830...
4,168285338,1256877766752034817,2020-05-03 09:26:25,2807902083,28,13,1256926064472756224,2020-05-03 12:38:20,2807901031,28,13,2.288855,03:11:55,0.715578,LINESTRING (442012.7041790278 4472378.96245223...


Read province file generated in notebook 2a

In [4]:
province_gdf = gpd.read_file(os.path.join('data', 'provinces_with_code.geojson'), encoding='utf-8').to_crs({'init': 'epsg:25830'})
province_gdf.head()

Unnamed: 0,NATCODE,NAMEUNIT,CODEUNIT,geometry
0,34104600000,València/Valencia,46,(POLYGON ((653037.6610938488 4429500.236400451...
1,34084500000,Toledo,45,"POLYGON ((294238.4652867644 4416957.42816667, ..."
2,34024400000,Teruel,44,"POLYGON ((765333.0112961649 4512252.350615062,..."
3,34094300000,Tarragona,43,(POLYGON ((813560.4348489635 4504876.753000001...
4,34074200000,Soria,42,"POLYGON ((583890.5889343297 4603311.305096235,..."


Enrich prov codes with name

In [5]:
ca_trips_with_name_gdf= pd.merge(province_gdf['NAMEUNIT CODEUNIT'.split()], ca_trips_gdf, left_on='CODEUNIT', right_on='CPRO_o')
ca_trips_with_name_gdf.head(2)

Unnamed: 0,NAMEUNIT,CODEUNIT,user_id,tw_id_o,timestamp_o,CUSEC_o,CPRO_o,CCA_o,tw_id_d,timestamp_d,CUSEC_d,CPRO_d,CCA_d,dist_km,time_length,speed_km_h,geometry
0,València/Valencia,46,52523147,1233331030834253826,2020-02-28 10:00:06,4625003033,46,10,1233377728189083655,2020-02-28 13:05:40,4625002009,46,10,1.193762,03:05:34,0.385984,LINESTRING (725299.315767807 4371027.364319429...
1,València/Valencia,46,207915704,1236244123914563590,2020-03-07 10:55:42,4622301004,46,10,1236288309397250051,2020-03-07 13:51:16,4625002003,46,10,4.859709,02:55:34,1.660808,LINESTRING (725885.9703448648 4366788.20113071...


In [6]:
ca_trips_with_name_gdf.drop(columns='CODEUNIT',inplace=True)
ca_trips_with_name_gdf.rename(columns={'NAMEUNIT':'NPRO_o'}, inplace=True)

In [7]:
ca_trips_with_name_gdf

Unnamed: 0,NPRO_o,user_id,tw_id_o,timestamp_o,CUSEC_o,CPRO_o,CCA_o,tw_id_d,timestamp_d,CUSEC_d,CPRO_d,CCA_d,dist_km,time_length,speed_km_h,geometry
0,València/Valencia,52523147,1233331030834253826,2020-02-28 10:00:06,4625003033,46,10,1233377728189083655,2020-02-28 13:05:40,4625002009,46,10,1.193762,03:05:34,0.385984,LINESTRING (725299.315767807 4371027.364319429...
1,València/Valencia,207915704,1236244123914563590,2020-03-07 10:55:42,4622301004,46,10,1236288309397250051,2020-03-07 13:51:16,4625002003,46,10,4.859709,02:55:34,1.660808,LINESTRING (725885.9703448648 4366788.20113071...
2,València/Valencia,337582557,1235138647231475713,2020-03-04 09:42:55,4625002003,46,10,1235329606321868802,2020-03-04 22:21:44,3001501014,30,14,206.080827,12:38:49,16.294910,LINESTRING (725651.9688835129 4371642.27276491...
3,València/Valencia,795944252178108416,1238012483648061440,2020-03-12 08:02:31,4625002003,46,10,1238097679349096453,2020-03-12 13:41:04,2807921028,28,13,298.544068,05:38:33,52.909892,LINESTRING (725651.9688835129 4371642.27276491...
4,València/Valencia,54215936,1233074083824328715,2020-02-27 16:59:05,4625005025,46,10,1233159305953759234,2020-02-27 22:37:44,1818901001,18,01,373.800192,05:38:39,66.227703,LINESTRING (726274.6267015492 4373866.13509991...
5,València/Valencia,54215936,1233399655272198145,2020-02-28 14:32:47,4625002003,46,10,1233409007009910784,2020-02-28 15:09:57,1620302001,16,08,232.524619,00:37:10,375.376066,LINESTRING (725651.9688835129 4371642.27276491...
6,València/Valencia,54215936,1235178599570984960,2020-03-04 12:21:41,4625002003,46,10,1235321856795840514,2020-03-04 21:50:56,4625001032,46,10,0.430942,09:29:15,0.045422,LINESTRING (725651.9688835129 4371642.27276491...
7,València/Valencia,54215936,1272132349908660224,2020-06-14 11:42:41,4625001032,46,10,1272229001818517508,2020-06-14 18:06:45,4625011012,46,10,4.652703,06:24:04,0.726859,LINESTRING (725647.1705078348 4372073.18813074...
8,València/Valencia,54215936,1275014889183031296,2020-06-22 10:36:52,4625002040,46,10,1275157035395485697,2020-06-22 20:01:43,4625001032,46,10,1.076638,09:24:51,0.114364,LINESTRING (726676.0951277413 4372390.14746742...
9,València/Valencia,15078495,1233004547569786881,2020-02-27 12:22:46,4625010040,46,10,1233122238213361664,2020-02-27 20:10:26,1202801006,12,10,74.402414,07:47:40,9.545570,LINESTRING (727390.0698535475 4370663.52822930...


In [8]:
ca_trips_with_name_gdf= pd.merge(province_gdf['NAMEUNIT CODEUNIT'.split()], ca_trips_with_name_gdf, left_on='CODEUNIT', right_on='CPRO_d')
ca_trips_with_name_gdf.drop(columns='CODEUNIT',inplace=True)
ca_trips_with_name_gdf.rename(columns={'NAMEUNIT':'NPRO_d'}, inplace=True)

In [9]:
ca_trips_with_name_gdf.head(2)

Unnamed: 0,NPRO_d,NPRO_o,user_id,tw_id_o,timestamp_o,CUSEC_o,CPRO_o,CCA_o,tw_id_d,timestamp_d,CUSEC_d,CPRO_d,CCA_d,dist_km,time_length,speed_km_h,geometry
0,València/Valencia,València/Valencia,52523147,1233331030834253826,2020-02-28 10:00:06,4625003033,46,10,1233377728189083655,2020-02-28 13:05:40,4625002009,46,10,1.193762,03:05:34,0.385984,LINESTRING (725299.315767807 4371027.364319429...
1,València/Valencia,València/Valencia,207915704,1236244123914563590,2020-03-07 10:55:42,4622301004,46,10,1236288309397250051,2020-03-07 13:51:16,4625002003,46,10,4.859709,02:55:34,1.660808,LINESTRING (725885.9703448648 4366788.20113071...


In [42]:
def generate_OD_matrix_fn(trips_df):
    trips_od_df = trips_df.groupby('NPRO_o NPRO_d'.split()).agg(['count'])
    sum_ = trips_od_df.sum(level=0, axis=0)
    norm_trips_od_df= trips_od_df.div(sum_, axis=1, level=0)
    return {'od_matrix': trips_od_df[trips_od_df.columns[0][0]], 'norm_od_matrix': norm_trips_od_df[norm_trips_od_df.columns[0][0]]}

## Intra and inter-province trips

In [15]:
trips_od_df = ca_trips_with_name_gdf.groupby('NPRO_o NPRO_d'.split()).agg(['count'])
trips_od_df

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id,tw_id_o,timestamp_o,CUSEC_o,CPRO_o,CCA_o,tw_id_d,timestamp_d,CUSEC_d,CPRO_d,CCA_d,dist_km,time_length,speed_km_h,geometry
Unnamed: 0_level_1,Unnamed: 1_level_1,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
NPRO_o,NPRO_d,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
A Coruña,A Coruña,69,69,69,69,69,69,69,69,69,69,69,69,69,69,69
A Coruña,Barcelona,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
A Coruña,Lugo,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
A Coruña,Madrid,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
A Coruña,Ourense,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
A Coruña,Pontevedra,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10
A Coruña,Sevilla,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
A Coruña,Zamora,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Alacant/Alicante,Alacant/Alicante,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533
Alacant/Alicante,Albacete,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [38]:
trips_od_df.columns[0][0]

'user_id'

In [20]:
sum_ = trips_od_df.sum(level=0, axis=0)
sum_

Unnamed: 0_level_0,user_id,tw_id_o,timestamp_o,CUSEC_o,CPRO_o,CCA_o,tw_id_d,timestamp_d,CUSEC_d,CPRO_d,CCA_d,dist_km,time_length,speed_km_h,geometry
Unnamed: 0_level_1,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
NPRO_o,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
A Coruña,91,91,91,91,91,91,91,91,91,91,91,91,91,91,91
Alacant/Alicante,608,608,608,608,608,608,608,608,608,608,608,608,608,608,608
Albacete,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46
Almería,172,172,172,172,172,172,172,172,172,172,172,172,172,172,172
Araba/Álava,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60
Badajoz,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66
Barcelona,3181,3181,3181,3181,3181,3181,3181,3181,3181,3181,3181,3181,3181,3181,3181
Burgos,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96
Cantabria,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
Castelló/Castellón,209,209,209,209,209,209,209,209,209,209,209,209,209,209,209


In [21]:
trips_od_df.div(sum_, axis=1, level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,user_id,tw_id_o,timestamp_o,CUSEC_o,CPRO_o,CCA_o,tw_id_d,timestamp_d,CUSEC_d,CPRO_d,CCA_d,dist_km,time_length,speed_km_h,geometry
Unnamed: 0_level_1,Unnamed: 1_level_1,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
NPRO_o,NPRO_d,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
A Coruña,A Coruña,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242,0.758242
A Coruña,Barcelona,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989
A Coruña,Lugo,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934,0.065934
A Coruña,Madrid,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989
A Coruña,Ourense,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978,0.021978
A Coruña,Pontevedra,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890,0.109890
A Coruña,Sevilla,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989
A Coruña,Zamora,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989,0.010989
Alacant/Alicante,Alacant/Alicante,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645,0.876645
Alacant/Alicante,Albacete,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645,0.001645


In [41]:
trips_od_df.columns[0][0]

'user_id'

In [35]:
ca_trips_with_name_gdf.columns[0]

'NPRO_d'

In [43]:
inter_prov_trips_df = ca_trips_with_name_gdf[ca_trips_with_name_gdf['CPRO_o']!=ca_trips_with_name_gdf['CPRO_d']]
inter_prov_ODs = generate_OD_matrix_fn(inter_prov_trips_df)
inter_prov_ODs['norm_od_matrix']

Unnamed: 0_level_0,Unnamed: 1_level_0,count
NPRO_o,NPRO_d,Unnamed: 2_level_1
A Coruña,Barcelona,0.045455
A Coruña,Lugo,0.272727
A Coruña,Madrid,0.045455
A Coruña,Ourense,0.090909
A Coruña,Pontevedra,0.454545
A Coruña,Sevilla,0.045455
A Coruña,Zamora,0.045455
Alacant/Alicante,Albacete,0.013333
Alacant/Alicante,Almería,0.013333
Alacant/Alicante,Barcelona,0.093333
