In [3]:
import insee
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go
import statsmodels.api as sm
import numpy as np

In [4]:
path = '/Users/andrea/Desktop/PhD/Projects/Current/NetMob/Data/BaseData'

In [5]:
a = pd.read_csv(f'{path}/admin_data_iris.csv', index_col=0)

In [6]:
len(a.columns)

399

In [7]:
len(a.columns.unique())

399

In [21]:
insee_tile_geo = gpd.read_file(f'{path}/insee_tile_geo.geojson')
insee_tile_geo = insee_tile_geo.set_index('insee_tile')

In [22]:
insee_tile_geo_buffered = insee_tile_geo.copy()
insee_tile_geo_buffered['geometry'] = insee_tile_geo_buffered.buffer(1000)

In [23]:
equipment_counts = insee.equipment.get_equipment_counts(polygons=insee_tile_geo_buffered, resolution=insee.equipment.EquipmentResolution.HIGH)

In [9]:
description = insee.equipment.get_equipment_description(resolution=insee.equipment.EquipmentResolution.HIGH)

In [10]:
description1 = insee.equipment.get_equipment_description(resolution=insee.equipment.EquipmentResolution.LOW)

In [24]:
frequently_visited_amenities = [
    "A203",  # BANQUE, CAISSE D’ÉPARGNE
    "A206",  # BUREAU DE POSTE
    "A207",  # RELAIS POSTE
    "A208",  # AGENCE POSTALE
    "A501",  # COIFFURE
    "A504",  # RESTAURANT- RESTAURATION RAPIDE
    "B101",  # HYPERMARCHÉ
    "B102",  # SUPERMARCHÉ
    "B201",  # SUPÉRETTE
    "B203",  # BOULANGERIE
    "B204",  # BOUCHERIE CHARCUTERIE
    "B301",  # LIBRAIRIE, PAPETERIE, JOURNAUX
    "B316",  # STATION SERVICE
    "C101",  # ÉCOLE MATERNELLE
    "C104",  # ÉCOLE ÉLÉMENTAIRE
    "C201",  # COLLÈGE
    "D201",  # MÉDECIN GÉNÉRALISTE
    "D307",  # PHARMACIE
    "E101",  # TAXI-VTC
    "E107",  # GARE DE VOYAGEURS D'INTERET NATIONAL
    "E108",  # GARE DE VOYAGEURS D'INTERET RÉGIONAL
    "F101",  # BASSIN DE NATATION
    "F103",  # TENNIS
    "F107",  # ATHLÉTISME
    "F121",  # SALLES MULTISPORTS (GYMNASES)
    "F303", # CINÉMA
    "F312", # EXPOSITION ET MEDIATION CULTURELLE
]

In [25]:
amenity_counts = equipment_counts.sum(axis=1).to_frame(name='amenity_count')

In [26]:
frequently_visited_amenity_counts = equipment_counts[frequently_visited_amenities].sum(axis=1).to_frame(name='frequently_visited_amenity_count')

In [27]:
amenity_counts = amenity_counts.merge(frequently_visited_amenity_counts, left_index=True, right_index=True)

In [52]:
amenity_counts.to_csv(f'{path}/amenity_counts_insee_tile.csv')

In [29]:
bed_time_index = pd.read_csv(f'{path}/bed_time_index_insee_tile.csv', index_col=0)

In [30]:
amenity_counts_and_bed_time_index = amenity_counts.merge(bed_time_index, left_index=True, right_index=True)    

In [39]:
amenity_counts_and_bed_time_index['log10_amenity_count'] = np.log10(amenity_counts_and_bed_time_index['amenity_count'])
amenity_counts_and_bed_time_index['log10_frequently_visited_amenity_count'] = np.log10(amenity_counts_and_bed_time_index['frequently_visited_amenity_count'])

In [51]:
x_axis = 'log10_amenity_count'
x_axis_title = 'N amenities'
fig = go.Figure()
scatter = go.Scatter(
    x=amenity_counts_and_bed_time_index[x_axis].values,
    y=amenity_counts_and_bed_time_index['bed_time_index'].values,
    mode='markers',
    marker=dict(
        size=3,
        color='black',
        opacity=0.5
    ),
    showlegend=False
)

fig.add_trace(scatter)

reg = sm.OLS(amenity_counts_and_bed_time_index['bed_time_index'].values, sm.add_constant(amenity_counts_and_bed_time_index[x_axis].values)).fit()

slope, intercept, pvalue = reg.params[1], reg.params[0], reg.pvalues[1]

reg_line = go.Scatter(
    x=amenity_counts_and_bed_time_index[x_axis].values,
    y=amenity_counts_and_bed_time_index[x_axis].values * slope + intercept,
    mode='lines',
    line=dict(
        color='red',
        width=3
    ),
    name=f'Slope: {slope:.2f},  p-value: {pvalue:.2f}'
)

fig.add_trace(reg_line)

fig.update_layout(
    title='Bed time index vs. number of amenities',
    xaxis_title=x_axis_title,
    yaxis_title='Bed time index',
    font=dict(
        size=30,
        color='black'
    ),
    template='plotly_white',
    width=1000,
    height=600,
    legend=dict(
        font=dict(
            size=20
        )
    )
    
)

fig.show(renderer='browser')






In [49]:
import xarray as xr
import pandas as pd
import numpy as np
import mobile_traffic as mt

In [58]:
dates = pd.date_range(start='2019-03-16', end='2019-05-31', freq='1D')
times = pd.timedelta_range(start='00:00:00', end='23:59:00', freq='15min')
dts = np.add.outer(dates, times).flatten()

In [59]:
locations = np.arange(10)
services = [mt.Service.TWITCH, mt.Service.FACEBOOK, mt.Service.YOUTUBE, mt.Service.AMAZON_WEB_SERVICES]

In [61]:
a = np.zeros(shape=(len(locations), len(dts), len(services)))
for i, dt in enumerate(dts):
    if pd.Timestamp(dt).time() < pd.to_datetime('08:00:00').time():
        a[:, i, :] = 1
        
for i, l in enumerate(locations):
    a[i, :, :] = a[i, :, :] * (i + 1)
    
for i, s in enumerate(services):
    a[:, :, i] = a[:, :, i] * (i + 1)

In [62]:
da = xr.DataArray(
    a,
    coords=[locations, dts, services],
    dims=["locations", "datetime", "services"],
)
da

In [67]:
c = da.sel(locations=3, services=mt.Service.FACEBOOK).to_pandas()

In [69]:
gb = da.groupby(group='datetime.time').sum()

In [75]:
gb.dims

('locations', 'time', 'services')

In [71]:
gb1 = gb.sel(services=[mt.Service.TWITCH, mt.Service.FACEBOOK])

In [72]:
gb2 = gb.sel(services=[mt.Service.YOUTUBE, mt.Service.AMAZON_WEB_SERVICES])

In [76]:
gb_recomposed = xr.concat(objs=[gb1, gb2], dim='services')

In [74]:
(gb_recomposed.data - gb.data).max()

0.0

In [77]:
xr.__version__

'2023.6.0'

In [84]:
[str(t) for t in gb.time.values]

['00:00:00',
 '00:15:00',
 '00:30:00',
 '00:45:00',
 '01:00:00',
 '01:15:00',
 '01:30:00',
 '01:45:00',
 '02:00:00',
 '02:15:00',
 '02:30:00',
 '02:45:00',
 '03:00:00',
 '03:15:00',
 '03:30:00',
 '03:45:00',
 '04:00:00',
 '04:15:00',
 '04:30:00',
 '04:45:00',
 '05:00:00',
 '05:15:00',
 '05:30:00',
 '05:45:00',
 '06:00:00',
 '06:15:00',
 '06:30:00',
 '06:45:00',
 '07:00:00',
 '07:15:00',
 '07:30:00',
 '07:45:00',
 '08:00:00',
 '08:15:00',
 '08:30:00',
 '08:45:00',
 '09:00:00',
 '09:15:00',
 '09:30:00',
 '09:45:00',
 '10:00:00',
 '10:15:00',
 '10:30:00',
 '10:45:00',
 '11:00:00',
 '11:15:00',
 '11:30:00',
 '11:45:00',
 '12:00:00',
 '12:15:00',
 '12:30:00',
 '12:45:00',
 '13:00:00',
 '13:15:00',
 '13:30:00',
 '13:45:00',
 '14:00:00',
 '14:15:00',
 '14:30:00',
 '14:45:00',
 '15:00:00',
 '15:15:00',
 '15:30:00',
 '15:45:00',
 '16:00:00',
 '16:15:00',
 '16:30:00',
 '16:45:00',
 '17:00:00',
 '17:15:00',
 '17:30:00',
 '17:45:00',
 '18:00:00',
 '18:15:00',
 '18:30:00',
 '18:45:00',
 '19:00:00',

In [87]:
gbcoor = gb.assign_coords(time=[str(t) for t in gb.time.values])

In [89]:
gbcoor.time

In [90]:
gb.time

In [95]:
path = '/Users/andrea/Desktop/PhD/Projects/Current/NetMob/Data/BaseData'

In [97]:
app_consumption_by_location_and_time_of_day = xr.open_dataset(f'{path}/app_consumption_by_location_and_by_time_of_day.nc')

ValueError: found the following matches with the input file in xarray's IO backends: ['netcdf4', 'h5netcdf']. But their dependencies may not be installed, see:
https://docs.xarray.dev/en/stable/user-guide/io.html 
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html

In [1]:
from ViolinPlot import aggregate_data

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
d = aggregate_data()

100%|██████████| 32/32 [1:08:50<00:00, 129.09s/it]


In [4]:
p ='/Users/andrea/Desktop/PhD/Projects/Current/NetMob/Data/BaseData'

In [5]:
d.to_netcdf(f'{p}/app_consumption_by_location_and_time_of_day_insee_tile.nc')

In [3]:
area_insee_tile = 200*200
area_netmob_tile = 100*100
total_area_insee_tiles = area_insee_tile * 6747
total_area_netmob_tiles = area_netmob_tile * 57846
print(f'Area of INSEE tiles: {total_area_insee_tiles / total_area_netmob_tiles} m2')

Area of INSEE tiles: 0.466549113162535 m2


In [15]:
import xarray as xr

In [16]:
p ='/Users/andrea/Desktop/PhD/Projects/Current/NetMob/Data/MobileTrafficData'

In [17]:
d = xr.open_dataset(f'{p}/mobile_traffic_bordeaux_by_insee_tile_service_and_time.nc').to_array().squeeze()

In [21]:
d.service.values

array(['Twitch', 'Orange_TV', 'Microsoft_Azure', 'Apple_iCloud',
       'Web_Games', 'PlayStation', 'TeamViewer', 'Web_Weather',
       'Google_Meet', 'Twitter', 'Amazon_Web_Services', 'Apple_Music',
       'Apple_Siri', 'Web_Ads', 'SoundCloud', 'Wikipedia',
       'Microsoft_Skydrive', 'Web_Transportation', 'Microsoft_Office',
       'Yahoo_Mail', 'Web_Food', 'WhatsApp', 'Google_Mail', 'YouTube',
       'Uber', 'Pinterest', 'Web_Clothes', 'Dropbox', 'Apple_Mail',
       'Web_Adult', 'DailyMotion', 'Instagram', 'Skype', 'Clash_of_Clans',
       'Pokemon_GO', 'Apple_App_Store', 'Google_Drive',
       'Apple_Web_Services', 'Apple_iTunes', 'Web_Finance',
       'Facebook_Live', 'Web_Downloads', 'EA_Games', 'Waze',
       'Google_Docs', 'Apple_Video', 'LinkedIn', 'Facebook_Messenger',
       'Snapchat', 'Deezer', 'Netflix', 'Facebook', 'Telegram',
       'Apple_iMessage', 'Microsoft_Store', 'Molotov', 'Google_Maps',
       'Tor', 'Google_Play_Store', 'Web_e-Commerce', 'Fortnite',
       'M

In [22]:
import pandas as pd

In [23]:
a = pd.DataFrame([[1,2],[3,4]], columns=['a', 'b'], index=['c', 'd'])

In [24]:
b = pd.DataFrame([[5,6],[7,8]], columns=['a', 'b'], index=['c', 'd'])

In [26]:
pd.concat([a, b]).reset_index()

Unnamed: 0,index,a,b
0,c,1,2
1,d,3,4
2,c,5,6
3,d,7,8


In [31]:
file_size_mb = 100
n_files = 20
n_copies = 18
memory_in_mb = file_size_mb * n_files * n_copies
memory_in_gb = memory_in_mb / 1000
print(f'Memory in GB: {memory_in_gb}')

Memory in GB: 36.0


In [30]:
time_in_seconds_per_city=90
n_cities = 20
n_repretitions = 15
time_in_seconds = time_in_seconds_per_city * n_cities * n_repretitions
time_in_minutes = time_in_seconds / 60
time_in_hours = time_in_minutes / 60
print(f'Time in hours: {time_in_hours}')

Time in hours: 7.5
