In [1]:
# Imports
import pandas as pd
import numpy as np

### Aggregate World Data by Country

In [17]:
# Read and aggregate data
req_cols = ['avg_d_kbps', 'avg_u_kbps', 'avg_lat_ms', 'tests', 'devices', 'quarter', 'category', 'iso3', 'name']

df_world = pd.DataFrame()

for i in range(0,30):
    df = pd.read_csv(f'./data/performance/preprocessed_files/whole_world/whole_world_{i}.csv', sep=';', usecols=req_cols)

    # aggregate as weighted average

    # create helper columns for weighted averages
    df['product1'] = df['avg_d_kbps'] * df['tests']
    df['product2'] = df['avg_u_kbps'] * df['tests']
    df['product3'] = df['avg_lat_ms'] * df['tests']

    # Aggregate
    df_agg = df.groupby(['name', 'quarter', 'category']).agg({'product1': np.average,
                                                              'product2': np.average,
                                                              'product3': np.average,
                                                              'tests': sum})

    # retrieve actual values from product columns
    df_agg['avg_d_kbps'] = df_agg['product1'] / df_agg['tests']
    df_agg['avg_u_kbps'] = df_agg['product2'] / df_agg['tests']
    df_agg['avg_lat_ms'] = df_agg['product3'] / df_agg['tests']

    # drop helper columns
    df_agg = df_agg.drop(columns=['product1', 'product2', 'product3'])

    # Concat together
    df_world = pd.concat([df_world, df_agg])

In [22]:
# Quick inspection
df_agg.sample(n=20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tests,avg_d_kbps,avg_u_kbps,avg_lat_ms
name,quarter,category,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Australia,2022-07-01,mobile,256498,3.31225,0.436214,0.000569
Nicaragua,2022-07-01,mobile,13251,9.661208,5.024867,0.011928
Bahamas,2022-07-01,mobile,685,156.223394,52.356707,0.170258
Isle of Man,2022-07-01,mobile,933,285.757211,94.431169,0.32738
Madeira Islands,2022-07-01,mobile,1102,505.42829,95.884564,0.224472
Greenland,2022-07-01,mobile,230,1922.938681,345.33903,0.872314
Colombia,2022-07-01,mobile,243173,0.867125,0.636049,0.0025
Japan,2022-07-01,mobile,265763,1.698646,0.282496,0.000957
Lithuania,2022-07-01,mobile,38262,8.661292,1.720626,0.002897
Chile,2022-07-01,mobile,238449,2.547632,0.702756,0.00141


### Data Exploration

In [27]:
# Germany
df_germany = pd.read_csv('./data/performance/germany_final.csv', sep=';')

In [28]:
df_germany.head()

Unnamed: 0,",",avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,quarter,category,long,lat,geometry,iso3,name,continent,region,iso_3166_1_
0,2348848,7864,1383,43,4,2,2019-01-01,fixed,8.415527,55.024873,POINT (8.41552734375 55.0248734409448),DEU,Germany,Europe,Western Europe,DE
1,12348849,32663,8941,34,1,1,2019-01-01,fixed,8.421021,55.024873,POINT (8.4210205078125 55.0248734409448),DEU,Germany,Europe,Western Europe,DE
2,22348850,23600,10557,29,1,1,2019-01-01,fixed,8.426514,55.024873,POINT (8.426513671875 55.0248734409448),DEU,Germany,Europe,Western Europe,DE
3,32348851,34739,7250,40,2,1,2019-01-01,fixed,8.432007,55.024873,POINT (8.4320068359375 55.0248734409448),DEU,Germany,Europe,Western Europe,DE
4,42348852,7083,1644,33,4,3,2019-01-01,fixed,8.421021,55.021725,POINT (8.4210205078125 55.0217245215306),DEU,Germany,Europe,Western Europe,DE
