In [None]:
import sys
sys.path.append("..")

%load_ext autoreload
%autoreload 1
%aimport src.datasets.joins
%aimport src.datasets.loading.statcan

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from src.datasets.loading import statcan
from src.datasets.loading import ookla
from src.datasets import overlays

import statsmodels as sm
# import statsmodels.stats.weightstats
from scipy.stats import lognorm

In [None]:
from src.datasets import joins

In [None]:
popctrs = statcan.boundary('population_centres')

In [None]:
o = joins.hexagons_popctrs_overlay()

In [None]:
tiles = ookla.canada_speed_tiles().loc[lambda s:(s.year==2022 ) & (s.conn_type=='fixed')]
tiles = tiles.to_crs(popctrs.crs)

In [None]:
o_statted = joins.add_simple_stats(o, tiles, 'HEXUID_PCPUID')

In [None]:
o_statted = o_statted.merge(statcan.hexagon_data(), on='HEXuid_HEXidu', how='left')

In [None]:
def breakpoint(value):
    if value == '>75% -  100%':
        return 0.75
    elif value == '>50% - 75%':
        return 0.5
    elif value == '>25% - 50%':
        return 0.25
    elif value == '>0% - 25%':
        return 0.0
    else:
        return 0.0

o_statted['access_level_numerical'] = o_statted.Avail_50_10_Gradient_Dispo.apply(breakpoint)

# o_statted['over_estimated_down'] = o_statted['crtc_label_numerical'] > o_statted['50p_d_Mbps']
# o_statted['over_estimated_up'] = o_statted['crtc_label_numerical'] > o_statted['p>10Mbps_up']
# o_statted['status_code_down'] = o_statted.apply(lambda s:status(s['p>50Mbps_down'], s.over_estimated_down), axis=1)
# o_statted['status_code_up'] = o_statted.apply(lambda s:status(s['p>10Mbps_up'], s.over_estimated_up), axis=1)

In [None]:
for col in o_statted:
    if "kbps" in col:
        o_statted[col] /= 1000
        o_statted.rename(columns={col:col.replace('kbps','Mbps')}, inplace=True)

In [None]:
o_statted[[
    'min_d_Mbps', '25p_d_Mbps', '50p_d_Mbps', '75p_d_Mbps', 'max_d_Mbps',
    'min_u_Mbps', '25p_u_Mbps', '50p_u_Mbps', '75p_u_Mbps'
    ]]

In [None]:
def calculate_50_10_level(s):
    down_level = None, 0.0
    if 50 <= s['min_d_Mbps']:
        down_level = '100%', 1.0
    elif s['min_d_Mbps'] < 50 <= s['25p_d_Mbps']:
        down_level = '75%-100%', 0.75
    elif s['25p_d_Mbps'] < 50 <= s['50p_d_Mbps']:
        down_level = '50%-75%', 0.5
    elif s['50p_d_Mbps'] < 50 <= s['75p_d_Mbps']: 
        down_level = '25%-50%', 0.25
    elif s['75p_d_Mbps'] < 50 <= s['max_d_Mbps']:
        down_level = '0%-25%', 0.0
    elif s['max_d_Mbps'] <= 50:
        down_level = 'Unvailable', 0.0

    up_level = None, 0.0
    if 50 <= s['min_u_Mbps']:
        up_level = '100%', 1.0
    elif s['min_u_Mbps'] < 50 <= s['25p_u_Mbps']:
        up_level = '75%-100%', 0.75
    elif s['25p_u_Mbps'] < 50 <= s['50p_u_Mbps']:
        up_level = '50%-75%', 0.5
    elif s['50p_u_Mbps'] < 50 <= s['75p_u_Mbps']: 
        up_level = '25%-50%', 0.25
    elif s['75p_u_Mbps'] < 50 <= s['max_u_Mbps']:
        up_level = '0%-25%', 0.0
    elif s['max_u_Mbps'] <= 50:
        up_level = 'Unvailable', 0.0

    if up_level[1] < down_level[1]:
        return up_level[0]
    else:
        return down_level[0]
    
def ookla_numerical(v):
    if v == '100%':
        return 1.0
    elif v == '75%-100%':
        return 0.75
    elif v == '50%-75%':
        return 0.5
    elif v == '25%-50%':
        return 0.25
    else:
        return 0.0


o_statted['Ookla_Avail_50_10'] = o_statted.apply(calculate_50_10_level, axis=1)
o_statted['Ookla_Avail_50_10_numerical'] = o_statted['Ookla_Avail_50_10'].apply(ookla_numerical)


In [None]:
o_statted['is_overestimated'] = o_statted['Ookla_Avail_50_10_numerical'] < o_statted['access_level_numerical']

In [None]:
xmin, ymin, xmax, ymax = popctrs.loc[lambda s:s.PCNAME=="Edmonton"].buffer(100000).total_bounds
o_statted.cx[xmin:xmax, ymin:ymax].loc[:,[
    'geometry','HEXUID_PCPUID', 
    'avg_d_Mbps', 'std_d_Mbps', 'min_d_Mbps', '25p_d_Mbps', '50p_d_Mbps', '75p_d_Mbps', 'max_d_Mbps', 
    'avg_u_Mbps', 'std_u_Mbps', 'min_u_Mbps', '25p_u_Mbps', '50p_u_Mbps', '75p_u_Mbps', 'max_u_Mbps', 
    'avg_lat_ms', 
    'tests', 'ave_tests_per_tile',
    'unique_devices', 'ave_devices_per_tile', 
    'num_tiles',
    'SumPop_2016_SommePop', #'SumURD_2016_SommeRH', 'SumTD_2016_SommeTL',
    'Avail_50_10_Gradient_Dispo', 'Ookla_Avail_50_10', 'is_overestimated'
]].explore('avg_d_Mbps', vmin=0, vmax=300)