In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import h5py
import matplotlib.pyplot as plt

In [2]:
df_milano = gpd.read_file('../data/milano.geojson')
df_boundary = df_milano[['NIL', 'geometry']]
# df_boundary.to_file('../data/trento.geojson', driver='GeoJSON')

In [3]:
df_boundary.head()

Unnamed: 0,NIL,geometry
0,LODOVICO IL MORO,"POLYGON ((9.15422 45.43775, 9.15274 45.43887, ..."
1,TRENNO,"POLYGON ((9.10623 45.49016, 9.10591 45.49084, ..."
2,PORTELLO,"POLYGON ((9.15636 45.48785, 9.15495 45.48852, ..."
3,BOVISASCA,"POLYGON ((9.16803 45.52234, 9.16763 45.52272, ..."
4,PARCO NORD,"POLYGON ((9.20040 45.52848, 9.20028 45.52846, ..."


In [4]:
f = h5py.File('d:/milano_10M.h5', 'r')
df_traffic = f['data'][()]
df_idx = f['idx'][()].astype(str)

In [5]:
df_sms = pd.DataFrame(df_traffic[:, :, 0] + df_traffic[:, :, 1]).T
df_call = pd.DataFrame(df_traffic[:, :, 2] + df_traffic[:, :, 3]).T
df_net = pd.DataFrame(df_traffic[:, :, -1]).T

In [6]:
df_sms['cellId'] = df_net.index.values + 1
df_call['cellId'] = df_net.index.values + 1
df_net['cellId'] = df_net.index.values + 1

In [7]:
# df_net = pd.DataFrame(df_traffic[:, :, -1]).T
# df_net['cellId'] = df_net.index.values + 1

In [8]:
df_grid = gpd.read_file('../data/milano-grid.geojson')

In [9]:
df_sms_geo = pd.merge(left=df_sms, right=df_grid, on='cellId')
df_call_geo = pd.merge(left=df_call, right=df_grid, on='cellId')
df_net_geo = pd.merge(left=df_net, right=df_grid, on='cellId')

In [10]:
sms_results = gpd.sjoin(df_boundary, gpd.GeoDataFrame(df_sms_geo), how='inner', op='intersects')
call_results = gpd.sjoin(df_boundary, gpd.GeoDataFrame(df_call_geo), how='inner', op='intersects')
net_results = gpd.sjoin(df_boundary, gpd.GeoDataFrame(df_net_geo), how='inner', op='intersects')

In [11]:
sms_final_results = sms_results.drop(['index_right'], axis=1)
call_final_results = call_results.drop(['index_right'], axis=1)
net_final_results = net_results.drop(['index_right'], axis=1)

In [12]:
sms_final_grouped = sms_final_results.groupby('NIL').agg('sum').reset_index().iloc[:, np.r_[0, 1:8929]]
call_final_grouped = call_final_results.groupby('NIL').agg('sum').reset_index().iloc[:, np.r_[0, 1:8929]]
net_final_grouped = net_final_results.groupby('NIL').agg('sum').reset_index().iloc[:, np.r_[0, 1:8929]]

In [13]:
sms_final_grouped.index = sms_final_grouped.NIL.values
call_final_grouped.index = call_final_grouped.NIL.values
net_final_grouped.index = net_final_grouped.NIL.values

In [14]:
sms_final_traffic = sms_final_grouped.iloc[:, 1:8929]
call_final_traffic = call_final_grouped.iloc[:, 1:8929]
net_final_traffic = net_final_grouped.iloc[:, 1:8929]

In [15]:
sms_final_traffic.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,8918,8919,8920,8921,8922,8923,8924,8925,8926,8927
ADRIANO,284.103994,246.874956,216.292595,249.578022,419.928666,223.97257,221.733742,167.371508,176.733715,224.0881,...,348.821247,253.496532,278.163356,233.068605,193.643891,211.551317,232.061758,125.897109,128.965962,118.61007
AFFORI,343.55268,300.875873,279.324674,275.100254,287.079249,188.379977,141.767349,103.400723,169.15889,118.930742,...,314.602841,572.481185,301.29792,351.482981,303.148154,248.396087,216.180433,298.181347,215.246014,197.383418
ASSIANO,114.117662,117.762808,161.707364,98.394815,82.33981,96.722432,65.114274,79.380493,97.540786,84.596729,...,206.855022,131.057343,180.193,202.043651,264.304762,178.082041,206.743607,137.122396,74.051143,121.978515
BAGGIO,341.799173,365.246409,304.01552,276.860742,225.733531,279.61342,186.415442,196.016524,126.31002,140.814878,...,669.163392,711.254297,642.5709,682.59292,533.105321,477.089515,490.117504,473.172835,413.825847,381.987785
BANDE NERE,877.044074,678.319333,592.777777,625.725374,418.842919,387.95421,364.277236,365.368133,306.484163,303.938927,...,866.453678,678.252181,653.306566,623.776719,738.619561,591.698351,458.800823,525.223978,492.170128,483.062048


In [16]:
sms_traffic_nid = sms_final_traffic.T
call_traffic_nid = call_final_traffic.T
net_traffic_nid = net_final_traffic.T

In [17]:
time_index = pd.to_datetime(df_idx, format='%Y-%m-%d %H:%M')

In [18]:
sms_traffic_nid.index = time_index.values
call_traffic_nid.index = time_index.values
net_traffic_nid.index = time_index.values

In [19]:
sms_traffic_nid.head()

Unnamed: 0,ADRIANO,AFFORI,ASSIANO,BAGGIO,BANDE NERE,BARONA,BICOCCA,BOLDINASCO,BOSCOINCITTA,BOVISA,...,SAN SIRO,SANTA GIULIA,SARPI,SCALO ROMANA,STEPHENSON,TIBALDI,TRE TORRI,TRENNO,TRIULZO SUPERIORE,XXII MARZO
2013-11-01 00:00:00,284.103994,343.55268,114.117662,341.799173,877.044074,274.245997,466.642253,600.729734,318.443705,304.659604,...,346.823778,201.529076,1203.341964,527.798927,80.377916,370.40322,138.069157,14.189294,95.837431,948.996467
2013-11-01 00:10:00,246.874956,300.875873,117.762808,365.246409,678.319333,248.794519,624.302009,457.864138,326.628297,357.164103,...,275.663281,160.512753,833.650323,435.226343,71.644616,380.95862,154.725735,12.039734,71.343692,708.135816
2013-11-01 00:20:00,216.292595,279.324674,161.707364,304.01552,592.777777,236.603805,441.73537,380.769479,251.094892,295.101008,...,312.566615,143.020892,860.418835,389.855246,19.376207,306.181243,93.376076,8.59366,52.816672,610.216084
2013-11-01 00:30:00,249.578022,275.100254,98.394815,276.860742,625.725374,255.284583,398.276471,522.408637,259.646851,301.369735,...,236.572447,126.210674,927.82072,277.748076,47.705647,300.509684,89.557187,7.839532,49.393578,503.951094
2013-11-01 00:40:00,419.928666,287.079249,82.33981,225.733531,418.842919,204.928792,436.553465,454.8068,248.393071,187.62256,...,124.159924,138.485289,739.981337,306.488984,34.548522,291.316089,64.908754,9.986353,56.325278,523.479069


In [20]:
sms_traffic_nid_pd = pd.DataFrame(sms_traffic_nid)
sms_traffic_nid_pd.to_csv('../data/milano_sms_traffic_nid.csv')

call_traffic_nid_pd = pd.DataFrame(call_traffic_nid)
call_traffic_nid_pd.to_csv('../data/milano_call_traffic_nid.csv')

net_traffic_nid_pd = pd.DataFrame(net_traffic_nid)
net_traffic_nid_pd.to_csv('../data/milano_net_traffic_nid.csv')

In [21]:
df_corr = net_traffic_nid_pd.corr().abs()

In [22]:
df_corr_duomo = df_corr['DUOMO']

In [23]:
df_boundary_corr = pd.merge(left=df_boundary, right=df_corr_duomo, left_on='NIL', right_on=df_corr_duomo.index)

In [24]:
df_boundary_corr.to_file('../data/milano_corr.geojson', driver='GeoJSON')

In [41]:
df_net_avg = pd.DataFrame(net_traffic_nid.mean(), columns=['avg_traffic'])

In [42]:
df_net_avg

Unnamed: 0,avg_traffic
ADRIANO,3037.645751
AFFORI,5759.433348
ASSIANO,2442.324456
BAGGIO,5305.902801
BANDE NERE,10808.494987
...,...
TIBALDI,5371.855896
TRE TORRI,2493.223179
TRENNO,133.555770
TRIULZO SUPERIORE,2028.793373


In [44]:
df_boundary_traffic = pd.merge(left=df_boundary, right=df_net_avg, left_on='NIL', right_on=df_net_avg.index)

In [46]:
df_boundary_traffic.to_file('../data/milano_avg_traffic.geojson', driver='GeoJSON')

In [54]:
df_boundary.dissolve().to_file('../data/milano_one_poly.geojson', driver='GeoJSON')

In [58]:
xx = df_boundary.dissolve()
buf_geo = df_boundary.dissolve()['geometry'].buffer(0.0001)[0]
xx['geometry'] = buf_geo


  buf_geo = df_boundary.dissolve()['geometry'].buffer(0.0001)[0]


In [61]:
xx.to_file('../data/milano_one_poly.geojson', driver='GeoJSON')