In [None]:
import pandas as pd
import numpy as np
import geopandas as gp
import matplotlib.pyplot as plt 
import sys
import csv
sys.path.append("..") # src exists in the parent directory
import src.config
output_name = "LastFourQuartersOrBestEstimate_On_DissolvedSmallerCitiesHexes.gpkg"
output_dir = src.config.DATA_DIRECTORY / "processed" / "statistical_geometries"

In [None]:
def convert_kbps_to_mbps(table, copy=False):
    if copy:
        table = table.copy()
    for col in table.columns:
        if "kbps" not in col:
            continue
        table.loc[:, col] /= 1000
        table.rename(columns={col: col.replace("kbps", "Mbps")}, inplace=True)
    return table

In [None]:
def load_speed_data():
    print("Loading speed data...")

    speed_data = gp.read_file(output_dir / output_name, driver="GPKG")

    speed_data["Ookla_Pop_at_50_10"] = (
        speed_data["Pop2016"] * speed_data["ookla_50_10_percentile"] / 100
    )

    speed_data = convert_kbps_to_mbps(speed_data)

    speed_data["is_rural"] = ~speed_data.PCCLASS.isin(["2", "3", "4"])

    # speed_data["PRUID"] = speed_data["PRCODE"].replace(PRCODE_MAP)
    speed_data["PCCLASS"] = speed_data["PCCLASS"].fillna("")
    return speed_data

In [None]:
data = load_speed_data()

In [None]:
data.columns

In [None]:
data[data['min_year'] == "Q1 2019"].plot.line(x="PRCODE", y="avg_d_Mbps")
labels = data['PRCODE'].sort_values().unique()
fig = plt.subplots()
ax = plt.gca()
ax.set_xticklabels(labels)
# fig = plt.figure(figsize=(100, 100), dpi=80)
for year in ["2019","2020","2021","2022","2023"]:
    for quarter in ["Q1"]:
        plot_data = data[data['min_year'] == str(quarter + ' ' + year)]
        plot_data1 = plot_data[['PRCODE','avg_d_Mbps']].groupby(['PRCODE']).mean()
        # print(plot_data1)
        plt.scatter(plot_data1.index.tolist(), plot_data1['avg_d_Mbps'], label = str(quarter + ' ' + year))
plt.axhline(y=50)        
plt.legend()
plt.show()

In [None]:
# x = time y = spped points = provinces

In [None]:
data[data['min_year'] == "Q1 2019"].plot.line(x="PRCODE", y="avg_d_Mbps")
labels = data['PRCODE'].sort_values().unique()
fig2 = plt.subplots(figsize=(15, 15))
# fig = plt.figure(figsize=(100, 100), dpi=80)
# for year in ["2019","2020","2021","2022","2023"]:
#     for quarter in ["Q1","Q2","Q3","Q4"]:
#         plot_data = data[data['min_year'] == str(quarter + ' ' + year)]
#         plot_data1 = plot_data[['PRCODE','avg_d_Mbps']].groupby(['PRCODE']).mean()
#         # print(plot_data1)
#         plt.scatter(plot_data1.index.tolist(), plot_data1['avg_d_Mbps'], label = str(quarter + ' ' + year))

for province in labels:
    plot_data = data[data['PRCODE'] == province]
    plot_data = plot_data[plot_data[]]
    plot_data1 = plot_data[['min_year', 'min_u_Mbps']].groupby(['min_year']).mean()
    print(plot_data1.index)
    plt.plot(plot_data1.index.tolist(), plot_data1['min_u_Mbps'], label = province)
plt.axhline(y=10)        
plt.legend()
plt.show()

In [None]:
dict_19 = pd.read_csv('2019-2020.csv', low_memory=False)
dict_20 = pd.read_csv('2020-2021.csv', low_memory=False)
dict_21 = pd.read_csv('2021-2022.csv', low_memory=False)
dict_22 = pd.read_csv('2022-2023.csv', low_memory=False)
dict_23 = pd.read_csv('2023.csv', low_memory=False)
dict_19 = dict_19.drop(['geometry','tests','devices','DAUID','PRUID','CDUID','CCSUID','CCSNAME','CSDUID','ERUID','SACCODE','CMAUID','CMAPUID','CMANAME','CMATYPE','CTUID','CTNAME','ADAUID','PCUID','PCPUID'], axis = 1)
for d in [dict_20,dict_21,dict_22,dict_23]:
    d = d.drop(['geometry','tests','devices','DAUID','PRUID','CDUID','CCSUID','CCSNAME','CSDUID','ERUID','SACCODE','CMAUID','CMAPUID','CMANAME','CMATYPE','CTUID','CTNAME','ADAUID','PCUID','PCPUID'], axis = 1)
    pd.concat([dict_19, d], axis = 1)
    dict_19.to_csv('Final.csv', index = False)

In [None]:
print(dict_19.index.tolist())

x1 = dict_19[['PRNAME','avg_d_kbps','year']].groupby(['PRNAME','year']).mean()
x2 = dict_20[['PRNAME','avg_d_kbps','year']].groupby(['PRNAME','year']).mean()
x3 = dict_21[['PRNAME','avg_d_kbps','year']].groupby(['PRNAME','year']).mean()
x4 = dict_22[['PRNAME','avg_d_kbps','year']].groupby(['PRNAME','year']).mean()
x5 = dict_23[['PRNAME','avg_d_kbps','year']].groupby(['PRNAME','year']).mean()
for x in [x1,x2,x3,x4,x5]:
    x['avg_d_kbps'] = x['avg_d_kbps']/1024
    print(len(x.index.tolist()))
print(x1)

In [None]:
fig = plt.subplots(figsize=(15, 15))
ax = plt.gca()
# fig = plt.figure(figsize=(100, 100), dpi=80)
for i in range(len(labels)):
    
    plt.plot(["2019","2020","2021","2022","2023"], [x1['avg_d_kbps'][i],x2['avg_d_kbps'][i],x3['avg_d_kbps'][i],x4['avg_d_kbps'][i],x5['avg_d_kbps'][i]], label = labels[i])

plt.axhline(y=50)        
plt.legend()
plt.show()

In [None]:
x1 = dict_19[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x2 = dict_20[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x3 = dict_21[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x4 = dict_22[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x5 = dict_23[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()

print(dict_19[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).agg(np.std, ddof=0))

for x in [x1,x2,x3,x4,x5]:
    x['avg_u_kbps'] = x['avg_u_kbps']/1024
print(x1)

fig = plt.subplots(figsize=(15, 15))
ax = plt.gca()
# fig = plt.figure(figsize=(100, 100), dpi=80)
for i in range(len(labels)):
        plt.plot(["2019","2020","2021","2022","2023"], [x1['avg_u_kbps'][i],x2['avg_u_kbps'][i],x3['avg_u_kbps'][i],x4['avg_u_kbps'][i],x5['avg_u_kbps'][i]], label = labels[i])

plt.axhline(y=50)        
plt.legend()
plt.show()

In [None]:
x1 = dict_19[['PRNAME','Down_50_percentile','year']].groupby(['PRNAME','year']).mean()
x2 = dict_20[['PRNAME','Down_50_percentile','year']].groupby(['PRNAME','year']).mean()
x3 = dict_21[['PRNAME','Down_50_percentile','year']].groupby(['PRNAME','year']).mean()
x4 = dict_22[['PRNAME','Down_50_percentile','year']].groupby(['PRNAME','year']).mean()
x5 = dict_23[['PRNAME','Down_50_percentile','year']].groupby(['PRNAME','year']).mean()
for x in [x1,x2,x3,x4,x5]:
    x['Down_50_percentile'] = x['Down_50_percentile']/1024
print(x1)

fig = plt.subplots(figsize=(15, 15))
ax = plt.gca()
# fig = plt.figure(figsize=(100, 100), dpi=80)
for i in range(len(labels)):
        plt.plot(["2019","2020","2021","2022","2023"], [x1['Down_50_percentile'][i],x2['Down_50_percentile'][i],x3['Down_50_percentile'][i],x4['Down_50_percentile'][i],x5['Down_50_percentile'][i]], label = labels[i])

plt.axhline(y=10)        
plt.legend()
plt.show()

In [None]:
x1 = dict_19[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x2 = dict_20[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x3 = dict_21[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x4 = dict_22[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
x5 = dict_23[['PRNAME','avg_u_kbps','year']].groupby(['PRNAME','year']).mean()
for x in [x1,x2,x3,x4,x5]:
    x['avg_u_kbps'] = x['avg_u_kbps']/1024
print(x1)

fig = plt.subplots(figsize=(15, 15))
ax = plt.gca()
# fig = plt.figure(figsize=(100, 100), dpi=80)
for i in range(len(labels)):
        plt.plot(["2019","2020","2021","2022","2023"], [x1['avg_u_kbps'][i],x2['avg_u_kbps'][i],x3['avg_u_kbps'][i],x4['avg_u_kbps'][i],x5['avg_u_kbps'][i]], label = labels[i])

plt.axhline(y=10)        
plt.legend()
plt.show()