In [None]:
import pandas as pd
import seaborn
import matplotlib.pyplot as plt

In [None]:
geo_data = pd.read_csv('geo_data_1.csv')

In [None]:
geo_data.info()

In [None]:
users = pd.read_csv('egl_typed.csv')
users.info()

In [None]:
ons_pop = pd.read_csv('area_populations.csv')
ons_pop.info()

In [None]:
users_geo = users.merge(
    geo_data,
    left_on='postcode.1',
    right_on='postcode',
    how='right'
)

In [None]:
user_loc_counts = users_geo.groupby('loc_code')['id_code'].nunique().reset_index()

In [None]:
user_pop_compare = user_loc_counts.merge(
    ons_pop,
    left_on='loc_code',
    right_on='Code',
    how='outer'
)

In [None]:
user_pop_compare['prop'] = user_pop_compare['id_code'] / user_pop_compare['persons']

In [None]:
user_pop_compare.info()

In [None]:
plt.subplots(figsize=(8, 6))
seaborn.violinplot(
    user_pop_compare['prop'].rename('Proportion of UK Population'),
    color='grey',
    orient='v'
)
seaborn.despine()
plt.savefig('market_share_violin.png')

In [None]:
anxiety_geo = users_geo.groupby('loc_code')['anxiety'].median().reset_index()

In [None]:
anxiety_geo.info()

In [None]:
import folium
import json

with open('topo_lad.json', 'r') as file:
    boundaries_file = file.read()

boundaries = json.loads(boundaries_file)

values = pd.DataFrame({
    'area': ['E06000001'],
    'value': [234]
})


basemap = folium.Map(
#     tiles='CartoDB dark_matter',
    location=[54, -2], 
    zoom_start=6
)

folium.Choropleth(
    geo_data=boundaries,
    name='choropleth',
    topojson='objects.lad',
    data=anxiety_geo,
    columns=['loc_code', 'anxiety'],
    key_on='feature.id',
    fill_color='BuPu',
    fill_opacity=0.7,
    line_opacity=1,
    smooth_factor=2
).add_to(basemap)

folium.LayerControl().add_to(basemap)

basemap.save('anx_map.html')

In [None]:
symptomatic_loc = users_geo.groupby(
    ['loc_code', 'covid_symptoms'])['id_code'].nunique().reset_index(
).pivot(index='loc_code', columns='covid_symptoms', values='id_code')

In [None]:
symptomatic_loc['prop'] = symptomatic_loc[1] / (symptomatic_loc[0] + symtomatic_loc[1])

In [None]:
symptomatic_anx = symptomatic_loc.merge(
    users_geo,
    on='loc_code'
)

In [None]:
symptomatic_anx['date'] = pd.to_datetime(symptomatic_anx['date'])
symptomatic_anx.info()

In [None]:
symptomatic_anx['date_rank'] = symptomatic_anx.groupby('id_code')['date'].rank(ascending=False)

In [None]:
symptomatic_anx_rec = symptomatic_anx[symptomatic_anx['date_rank'] == 1]
symptomatic_anx_rec.shape

In [None]:
seaborn.lmplot(
    x='prop',
    y='anxiety',
    data=symptomatic_anx_rec
)

In [None]:
symptomatic_anx_rec.info()

In [None]:
symptomatic_anx_rec.sample()

In [None]:
symptomatic_anx_rec.to_pickle('symp_anx_rec.p')