# Summary 
Tests/demos the subsetted geometry output from the `ArbitrayGeomPHHCalc.ipynb` notebook. 
Adds basic stats from a single quarter and makes a sample map. 

In [1]:
import sys
sys.path.append("..")

%load_ext autoreload
%autoreload 1
%aimport src.datasets.joins
%aimport src.datasets.loading.statcan

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from src.datasets.loading import statcan
from src.datasets.loading import ookla
from src.datasets import overlays

import statsmodels as sm
# import statsmodels.stats.weightstats
from scipy.stats import lognorm

import src.config
from pathlib import Path
import geopandas as gp
import src.datasets.joins as joins



In [2]:
CRS = 'EPSG:4326'

In [3]:
popctrs = statcan.boundary('population_centres')

In [4]:
popctrs.crs

<Derived Projected CRS: PROJCS["PCS_Lambert_Conformal_Conic",GEOGCS["NAD83 ...>
Name: PCS_Lambert_Conformal_Conic
Axis Info [cartesian]:
- [east]: Easting (metre)
- [north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unnamed
- method: Lambert Conic Conformal (2SP)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [5]:
derived_geometry = (Path(src.config.DATA_DIRECTORY) / 'processed' / 'geometries').resolve()

speed_data = gp.read_file(derived_geometry / 'hexagons_w_dissolved_smaller_popctrs.geojson')#, driver='GeoJSON')
# #speed_data = speed_data.to_crs(CRS)
# speed_data.crs = popctrs.crs #assign crs b/c geojson assumes GPS/epsg:4326
speed_data = speed_data.to_crs(popctrs.crs) #now encoded in GPS-Lat/Lon, but we should convert it back

In [6]:
tiles = ookla.canada_speed_tiles()#.loc[lambda s:(s.year==2022 ) & (s.conn_type=='fixed')]
tiles = tiles.to_crs(CRS)

In [7]:
speed_data = joins.add_simple_stats(speed_data, tiles.loc[lambda s:(s.year==2022) & (s.conn_type=='fixed')], 'HEXUID_PCPUID')
speed_data = joins.add_50_10_stats(speed_data, tiles.loc[lambda s:(s.year==2022) & (s.conn_type=='fixed')], 'HEXUID_PCPUID')

In [8]:
speed_data

Unnamed: 0,PCPUID,pc_area,hex_area,hex_frac,pc_frac,HEXUID_PCPUID,PRCODE,PCNAME,PCCLASS,Pop2016,...,75p_u_kbps,max_u_kbps,avg_lat_ms,tests,ave_tests_per_tile,unique_devices,ave_devices_per_tile,num_tiles,50_down_percentile,10_up_percentile
0,100792,1.719773e+08,2.539316e+07,0.440441,0.065033,NL47580528-100792,NL,St. John's,4,9896.147562,...,120759.0,329948.0,16.618090,4159.0,20.899497,1340.0,6.733668,199.0,88.442211,96.984925
1,100792,1.719773e+08,2.539316e+07,0.514429,0.075958,NL47580529-100792,NL,St. John's,4,4252.819048,...,142081.5,633774.0,21.037037,1041.0,7.711111,307.0,2.274074,135.0,93.333333,96.296296
2,100792,1.719773e+08,2.536795e+07,0.000288,0.000043,NL47710527-100792,NL,St. John's,4,0.000000,...,21291.5,24143.0,33.666667,29.0,9.666667,7.0,2.333333,3.0,33.333333,66.666667
3,100792,1.719773e+08,2.542434e+07,0.043914,0.006492,NL47430528-100792,NL,St. John's,4,2662.527342,...,120987.5,391351.0,12.842105,338.0,17.789474,66.0,3.473684,19.0,100.000000,100.000000
4,100792,1.719773e+08,2.540903e+07,0.619943,0.091594,NL47510528-100792,NL,St. John's,4,24029.650382,...,107654.0,430590.0,17.899628,5584.0,20.758364,1556.0,5.784387,269.0,93.680297,97.769517
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518090,,,2.441105e+07,1.000000,,BC53331204,BC,,,0.000000,...,,,,,,,,,,
518091,,,2.447289e+07,1.000000,,BC52891204,BC,,,0.000000,...,,,,,,,,,,
518092,,,2.450451e+07,1.000000,,BC52661204,BC,,,0.000000,...,,,,,,,,,,
518093,,,2.444205e+07,1.000000,,BC53111204,BC,,,0.000000,...,,,,,,,,,,


In [9]:
unused_columns = ['pc_area','hex_area','pc_frac', 'hex_frac']
for col in unused_columns:
    del speed_data[col]

In [10]:
speed_data['ookla_50_10_percentile'] = speed_data.apply(lambda s:min(s['50_down_percentile'], s['10_up_percentile']), axis=1)

In [11]:
speed_data.columns

Index(['PCPUID', 'HEXUID_PCPUID', 'PRCODE', 'PCNAME', 'PCCLASS', 'Pop2016',
       'TDwell2016_TLog2016', 'URDwell2016_RH2016', 'PHH_Count', 'Common_Type',
       'Pop2016_at_50_10_Combined', 'TDwell2016_at_50_10_Combined',
       'URDwell_at_50_10_Combined', 'Pop_Avail_50_10', 'TDwell_Avail_50_10',
       'URDwell_Avail_50_10', 'geometry', 'avg_d_kbps', 'std_d_kbps',
       'min_d_kbps', '25p_d_kbps', '50p_d_kbps', '75p_d_kbps', 'max_d_kbps',
       'avg_u_kbps', 'std_u_kbps', 'min_u_kbps', '25p_u_kbps', '50p_u_kbps',
       '75p_u_kbps', 'max_u_kbps', 'avg_lat_ms', 'tests', 'ave_tests_per_tile',
       'unique_devices', 'ave_devices_per_tile', 'num_tiles',
       '50_down_percentile', '10_up_percentile', 'ookla_50_10_percentile'],
      dtype='object')

In [13]:
xmin, ymin, xmax, ymax = popctrs.loc[lambda s:s.PCNAME=="Lloydminster"].buffer(90_000).total_bounds
speed_data.cx[xmin:xmax,ymin:ymax].loc[lambda s:(s.Pop2016>0) | (s.avg_d_kbps > 0)].explore(
    'ookla_50_10_percentile',scheme='equalinterval', k = 4, 
    tooltip=['HEXUID_PCPUID','PCNAME','Pop2016','Pop_Avail_50_10','ookla_50_10_percentile'],
    popup=['HEXUID_PCPUID','PCNAME','min_d_kbps','avg_d_kbps','max_d_kbps','min_u_kbps','avg_u_kbps','max_u_kbps','Pop2016','tests','unique_devices','Pop_Avail_50_10','ookla_50_10_percentile','50_down_percentile','10_up_percentile']
    )