In [32]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import folium
from scipy.spatial import Voronoi
# for requirements: install fastparquet 

import glob # not needed in requirements
import os # not needed in requirements

## 1. Random operations on data

In [33]:
PATH_TOPO = '..\\..\\Data_Handover\\TOPO\\Lyon\\part-00000-2f038a97-7faf-46f3-aee2-b73b52bf2cba.snappy.parquet'

In [34]:
enb_df = pd.read_parquet(PATH_TOPO, engine='fastparquet')
enb_df

Unnamed: 0,LocInfo,TECHNO,LON,LAT
0,8102f8100000ba1a,4G,4.805831,45.881115
1,8102f8100000ae6f,4G,5.103326,45.661649
2,8102f8100000b9fc,4G,5.006670,45.770836
3,8102f8100000999d,4G,4.888138,45.738378
4,8102f81000009baf,4G,4.889202,45.744970
...,...,...,...,...
9324,0102f810041ec940,3G,4.695403,45.642543
9325,0002f8100705368e,2G,4.849999,45.753893
9326,0002f810511084f7,2G,4.768475,45.784375
9327,0102f810041e6337,3G,4.865802,45.673625


In [9]:
PATH_HO = '..\\..\\Data_Handover\\usersHandovers\\2019\\03\\16\\Lyon_HO_IN'

all_files = glob.glob(PATH_HO + "\\*.parquet")

li = []

for filename in all_files:
    df = pd.read_parquet(filename, engine='fastparquet')
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)
frame

Unnamed: 0,timeSlot,srcLocInfo,dstLocInfo,nbHO15
0,1552734000,8102f81000009986,8102f81000009991,171
1,1552753800,8102f810000099d1,8102f810000099ae,1508
2,1552757400,8102f8100000a481,8102f81000009c31,814
3,1552753800,8102f81000009de8,8102f81000009d5e,277
4,1552732200,8102f81000009952,8102f81000009975,1198
...,...,...,...,...
719641,1552759200,0002f8100704edda,0002f8100704512d,0
719642,1552759200,0002f81051100807,0002f81051053587,0
719643,1552746600,8102f8100000ba74,8102f8100000ba60,0
719644,1552770000,8102f810000099c9,8102f8100000babc,0


In [34]:
agg = frame.groupby(['srcLocInfo', 'dstLocInfo'])['nbHO15'].sum()
df_agg = pd.DataFrame(agg)
df_agg = df_agg[df_agg['nbHO15'] >0] 
#agg = agg.drop(agg.loc[agg.values==0].index)
#agg

In [35]:
df_agg.to_parquet(path='HO_AGG.snappy.parquet', engine='fastparquet', compression='snappy')

In [36]:
pd.read_parquet(path='HO_AGG.snappy.parquet', engine='fastparquet')

Unnamed: 0_level_0,Unnamed: 1_level_0,nbHO15
srcLocInfo,dstLocInfo,Unnamed: 2_level_1
0002f81000686160,0002f81007013bfa,26
0002f810041c2388,0002f81007041283,16
0002f81007011463,0002f8100701fe3f,31
0002f81007011465,0002f810070126d7,17
0002f810070114de,0002f81007013336,1240
...,...,...
8102f8100000bad0,8102f8100000a1fa,515
8102f8100000bad0,8102f8100000a4d9,57
8102f8100000bad0,8102f8100000b9cc,5924
8102f8100000bad0,8102f8100000ba30,189


In [23]:
AGG_PATH = '..\\Processed_data\\HO_AGG_16-22.snappy.parquet'
agg_df = pd.read_parquet(path=AGG_PATH, engine='fastparquet')
agg_df = agg_df.reset_index()
agg_df

Unnamed: 0,srcLocInfo,dstLocInfo,nbHO15
0,0002f81000680300,0002f8100701fe5e,30
1,0002f8100068258d,0002f8100701fe5e,33
2,0002f81000683d30,0002f81007011bf5,20
3,0002f81000685a56,0002f81007012d5c,15
4,0002f81000686160,0002f81007013bfa,158
...,...,...,...
20204,8102f8100000bad0,8102f8100000ba30,875
20205,8102f8100000bad0,8102f8100000ba34,52
20206,8102f8100000bad0,8102f8100000ba49,70
20207,8102f8100000bad0,8102f8100000babe,212


In [24]:
# Add src EnB coordinates
agg_df = agg_df.join(enb_df.set_index('LocInfo'), on='srcLocInfo')
agg_df.rename(columns={'TECHNO':'srcTechno','LON':'srcLON', 'LAT':'srcLAT'}, inplace=True)

# Add dst EnB coordinates
agg_df = agg_df.join(enb_df.set_index('LocInfo'), on='dstLocInfo')
agg_df.rename(columns={'TECHNO':'dstTechno','LON':'dstLON', 'LAT':'dstLAT'}, inplace=True)

agg_df

Unnamed: 0,srcLocInfo,dstLocInfo,nbHO15,srcTechno,srcLON,srcLAT,dstTechno,dstLON,dstLAT
0,0002f81000680300,0002f8100701fe5e,30,,,,2G,4.988614,45.665556
1,0002f8100068258d,0002f8100701fe5e,33,,,,2G,4.988614,45.665556
2,0002f81000683d30,0002f81007011bf5,20,,,,2G,4.954243,45.676842
3,0002f81000685a56,0002f81007012d5c,15,,,,2G,4.976904,45.698743
4,0002f81000686160,0002f81007013bfa,158,,,,2G,4.937226,45.721670
...,...,...,...,...,...,...,...,...,...
20204,8102f8100000bad0,8102f8100000ba30,875,4G,4.820179,45.775520,4G,4.859129,45.802421
20205,8102f8100000bad0,8102f8100000ba34,52,4G,4.820179,45.775520,4G,4.869052,45.815295
20206,8102f8100000bad0,8102f8100000ba49,70,4G,4.820179,45.775520,4G,4.775058,45.790371
20207,8102f8100000bad0,8102f8100000babe,212,4G,4.820179,45.775520,4G,4.814739,45.775244


In [29]:
agg_df_4G = agg_df.loc[agg_df['srcTechno']=='4G'] # no need to filter by dstTechno, same output
agg_df_4G

Unnamed: 0,srcLocInfo,dstLocInfo,nbHO15,srcTechno,srcLON,srcLAT,dstTechno,dstLON,dstLAT
3228,8102f8100000992d,8102f810000099cd,21,4G,4.911542,45.777576,4G,4.861666,45.773614
3229,8102f8100000992d,8102f81000009c44,1502,4G,4.911542,45.777576,4G,4.925007,45.773378
3230,8102f8100000992d,8102f81000009f21,361,4G,4.911542,45.777576,4G,4.898554,45.787055
3231,8102f8100000992d,8102f81000009fed,2466,4G,4.911542,45.777576,4G,4.889721,45.772503
3232,8102f8100000992d,8102f8100000a2a1,1582,4G,4.911542,45.777576,4G,4.898178,45.766288
...,...,...,...,...,...,...,...,...,...
20204,8102f8100000bad0,8102f8100000ba30,875,4G,4.820179,45.775520,4G,4.859129,45.802421
20205,8102f8100000bad0,8102f8100000ba34,52,4G,4.820179,45.775520,4G,4.869052,45.815295
20206,8102f8100000bad0,8102f8100000ba49,70,4G,4.820179,45.775520,4G,4.775058,45.790371
20207,8102f8100000bad0,8102f8100000babe,212,4G,4.820179,45.775520,4G,4.814739,45.775244


In [47]:
agg_df_4G.astype({'srcLON': 'float64',
                  'srcLAT': 'float64',
                  'dstLON': 'float64',
                  'dstLAT': 'float64'}).to_parquet(path='..\\Processed_data\\HO_AGG_16-22_4G_coords.snappy.parquet', engine='fastparquet', compression='snappy')

In [2]:
agg_df_4G = pd.read_parquet(path='..\\Processed_data\\HO_AGG_16-22_4G_coords.snappy.parquet', engine='fastparquet')
agg_df_4G

Unnamed: 0_level_0,srcLocInfo,dstLocInfo,nbHO15,srcTechno,srcLON,srcLAT,dstTechno,dstLON,dstLAT
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3228,8102f8100000992d,8102f810000099cd,21,4G,4.911542,45.777576,4G,4.861666,45.773614
3229,8102f8100000992d,8102f81000009c44,1502,4G,4.911542,45.777576,4G,4.925007,45.773378
3230,8102f8100000992d,8102f81000009f21,361,4G,4.911542,45.777576,4G,4.898554,45.787055
3231,8102f8100000992d,8102f81000009fed,2466,4G,4.911542,45.777576,4G,4.889721,45.772503
3232,8102f8100000992d,8102f8100000a2a1,1582,4G,4.911542,45.777576,4G,4.898178,45.766288
...,...,...,...,...,...,...,...,...,...
20204,8102f8100000bad0,8102f8100000ba30,875,4G,4.820179,45.775520,4G,4.859129,45.802421
20205,8102f8100000bad0,8102f8100000ba34,52,4G,4.820179,45.775520,4G,4.869052,45.815295
20206,8102f8100000bad0,8102f8100000ba49,70,4G,4.820179,45.775520,4G,4.775058,45.790371
20207,8102f8100000bad0,8102f8100000babe,212,4G,4.820179,45.775520,4G,4.814739,45.775244


In [3]:
agg_df_4G.dtypes

srcLocInfo     object
dstLocInfo     object
nbHO15          int64
srcTechno      object
srcLON        float64
srcLAT        float64
dstTechno      object
dstLON        float64
dstLAT        float64
dtype: object

## 2. Creating Folium maps for visualization

In [27]:
lon1, lat1 = 4.764169, 45.70111
lon2, lat2 = 4.888378, 45.776827

agg_df_reduced = agg_df_4G.loc[(agg_df_4G['srcLON'] >= lon1) & (agg_df_4G['srcLON'] <= lon2) & (agg_df_4G['srcLAT'] >= lat1) & (agg_df_4G['srcLAT'] <= lat2)]
agg_df_reduced

Unnamed: 0_level_0,srcLocInfo,dstLocInfo,nbHO15,srcTechno,srcLON,srcLAT,dstTechno,dstLON,dstLAT
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3239,8102f8100000994e,8102f8100000994e,34,4G,4.818062,45.762236,4G,4.818062,45.762236
3240,8102f8100000994e,8102f81000009952,174,4G,4.818062,45.762236,4G,4.827545,45.752980
3241,8102f8100000994e,8102f81000009957,3879,4G,4.818062,45.762236,4G,4.826112,45.774444
3242,8102f8100000994e,8102f81000009958,1163,4G,4.818062,45.762236,4G,4.835282,45.761386
3243,8102f8100000994e,8102f8100000995b,17,4G,4.818062,45.762236,4G,4.858331,45.753607
...,...,...,...,...,...,...,...,...,...
20204,8102f8100000bad0,8102f8100000ba30,875,4G,4.820179,45.775520,4G,4.859129,45.802421
20205,8102f8100000bad0,8102f8100000ba34,52,4G,4.820179,45.775520,4G,4.869052,45.815295
20206,8102f8100000bad0,8102f8100000ba49,70,4G,4.820179,45.775520,4G,4.775058,45.790371
20207,8102f8100000bad0,8102f8100000babe,212,4G,4.820179,45.775520,4G,4.814739,45.775244


In [29]:
agg_df_reduced.to_parquet(path='..\\Processed_data\\HO_AGG_16-22_4G_coords_reduced.snappy.parquet', engine='fastparquet', compression='snappy')

In [36]:
enb_df = enb_df.loc[enb_df['LocInfo'].isin(agg_df_reduced['srcLocInfo'])] 
enb_df

Unnamed: 0,LocInfo,TECHNO,LON,LAT
3,8102f8100000999d,4G,4.888138,45.738378
5,8102f81000009b28,4G,4.819723,45.771390
9,8102f8100000997e,4G,4.875561,45.753615
12,8102f81000009976,4G,4.846672,45.729169
17,8102f81000009959,4G,4.822966,45.733027
...,...,...,...,...
574,8102f81000009f4a,4G,4.874679,45.764310
577,8102f8100000997b,4G,4.880686,45.725675
581,8102f8100000ba70,4G,4.867783,45.701943
582,8102f8100000996d,4G,4.864943,45.729614


In [64]:
def create_map():
    map = folium.Map([45.73303, 4.82297], tiles="OpenStreetMap", zoom_start=13)
    print('Created base station map.')
    return map

def add_stations(map, df, name='4G stations'):
    print('Adding '+name+' layer...')
    fg = folium.FeatureGroup(name=name) # Name as it will appear in Layer control
    enb_ids = df.srcLocInfo.unique()
    for id in enb_ids:
        enb_data = df.loc[df['srcLocInfo']==id]
        fg.add_child(folium.Marker(
            location=[enb_data['srcLAT'].iloc[-1], enb_data['srcLON'].iloc[-1]],
            popup=enb_data['srcLocInfo'].iloc[-1],
        ))
    map.add_child(fg)
    
def close_map(map, filename):
    folium.LayerControl().add_to(map)
    map.save('maps\\'+filename)
    print('Closing', filename, 'map.')
    
def add_voronoi(map, points):
    vor = Voronoi(list(zip(points['LAT'].tolist(), points['LON'].tolist())))  # Careful with the inversion!!  
    fg = folium.FeatureGroup(name='Voronoi cells', show=True)
    for enb, reg_idx in enumerate(vor.point_region):
        region = vor.regions[reg_idx]
        if -1 not in region and region != []:
            region_coords = []
            for vertex in region:
                region_coords.append(vor.vertices[vertex])
            fg.add_child(folium.Polygon(
                region_coords,
                color="blue",
                weight=3,
                fill_color="blue",
                fill_opacity=0.2,
                fill=True,
            ))
    map.add_child(fg)


In [67]:
lyon = create_map()
add_stations(lyon, agg_df_reduced)
add_voronoi(lyon, enb_df)
close_map(lyon, 'lyon_markers.html')

Created base station map.
Adding 4G stations layer...
Closing lyon_markers.html map.
