# Generating Active case data from https://coronavirus.data.gov.uk/

In [None]:
import sys
from pathlib import Path
#Set root to be the main project folder
#Note that this notebook is in /SOCIAL_DISTANCING/CODE/NOTEBOOKS/TEST_NOTEBOOKS
root = Path.cwd().parent.parent
print(root)
py_path = Path(root/'code/py-files')
print(py_path)
data_path = Path(root/'static')
print(data_path)

#Add location of py files to path so we can import
sys.path.insert(0,str(py_path))

In [272]:
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
from importlib import reload
from tqdm import tqdm
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import cm
from scipy.spatial import distance_matrix

In [273]:
utla_folder = Path(data_path/'utla_data/')

In [274]:
import os
import requests

url = "https://api.coronavirus.data.gov.uk/v2/data?areaType=utla&metric=cumCasesBySpecimenDate&metric=newCasesBySpecimenDate&format=csv"
req = requests.get(url)
url_content = req.content
csv_file = open(utla_folder/'csv/covid_cases.csv', 'wb')

csv_file.write(url_content)
csv_file.close()

In [275]:
df_covid = pd.read_csv(utla_folder/'csv/covid_cases.csv')

In [276]:
df_covid

Unnamed: 0,areaCode,areaName,areaType,date,cumCasesBySpecimenDate,newCasesBySpecimenDate
0,E06000003,Redcar and Cleveland,utla,2021-07-20,14187,56
1,E06000014,York,utla,2021-07-20,16535,19
2,E06000050,Cheshire West and Chester,utla,2021-07-20,28401,44
3,E08000001,Bolton,utla,2021-07-20,37057,39
4,E08000016,Barnsley,utla,2021-07-20,26461,36
...,...,...,...,...,...,...
107449,E10000012,Essex,utla,2020-03-01,1,0
107450,E08000035,Leeds,utla,2020-02-29,2,0
107451,E10000012,Essex,utla,2020-02-29,1,0
107452,E08000035,Leeds,utla,2020-02-28,2,2


In [277]:
df_pops = pd.read_csv(utla_folder/'csv/population.csv')
df_pops = df_pops.iloc[:151]

In [278]:
utlasA = df_covid["areaCode"].unique()
utlasA.sort()

In [279]:
gdf_ut = gpd.read_file(utla_folder/"shp/UTLA_ENG.shp")

In [280]:
df_covid = df_covid.sort_values("date").reset_index()

In [281]:
dates = df_covid["date"].unique()
dates.sort()

In [282]:
def GenerCases(date):
    #print(date)
    df_covid_date = df_covid[df_covid["date"] == date].copy()
    df_covid_date = df_covid_date.reset_index(drop=True)
    #print(date)
    TotalCases = np.full(gdf_ut.UTLA19CD.size, 0)
    for j, itemA in enumerate(gdf_ut.UTLA19CD):
        for k, itemB in enumerate(df_covid_date["areaCode"]):
            if itemA == itemB:
                TotalCases[j] =  df_covid_date["cumCasesBySpecimenDate"][k]
    return TotalCases

In [283]:
july_date = np.where(dates == '2020-07-01')[0][0]
dates_from_july = dates[july_date:]

In [284]:
avg_infection_time = 10

In [285]:
cases = dates_from_july[::avg_infection_time]

In [304]:
def produce_cases(cases):
    all_new_cases = []
    old_item =  GenerCases(cases[0])
    for item in cases[1:]:
        new_cases = GenerCases(item) - old_item
        all_new_cases.append(new_cases)
        old_item = GenerCases(item)
    return np.array(all_new_cases)

In [318]:
new_cases = produce_cases(cases)

In [320]:
# Uncomment if you wish to have cases per 1000 people
old_item = cases[0]
for k, item in enumerate(new_cases):
    gdf_ut['{}'.format(cases[k])] =  item #/ df_pops["population"] * 1000
    old_item = cases[k]

In [321]:
gdf_ut

Unnamed: 0,UTLA19CD,UTLA19NAME,geometry,2020-07-01,2020-07-11,2020-07-21,2020-07-31,2020-08-10,2020-08-20,2020-08-30,...,2021-04-17,2021-04-27,2021-05-07,2021-05-17,2021-05-27,2021-06-06,2021-06-16,2021-06-26,2021-07-06,pop
0,E06000001,Hartlepool,"POLYGON ((450022.099 526039.600, 449679.001 52...",3,11,13,10,12,18,78,...,38,18,16,11,11,87,224,850,1491,93663
1,E06000002,Middlesbrough,"POLYGON ((453314.913 515472.255, 454376.905 51...",8,11,7,46,32,55,105,...,45,78,72,60,55,112,252,1048,2546,140980
2,E06000003,Redcar and Cleveland,"MULTIPOLYGON (((464636.314 512421.190, 464248....",3,4,6,0,12,37,61,...,26,28,22,33,22,89,280,1021,2640,137150
3,E06000004,Stockton-on-Tees,"MULTIPOLYGON (((446120.310 510524.689, 446312....",9,7,11,16,9,33,83,...,73,77,48,33,63,145,309,1288,2913,197348
4,E06000005,Darlington,"POLYGON ((430852.381 510073.154, 430509.720 50...",2,4,6,3,4,13,29,...,42,31,14,13,10,122,374,815,1369,106803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,E10000029,Suffolk,"MULTIPOLYGON (((630151.397 234198.716, 628268....",21,25,36,37,39,47,109,...,230,179,140,85,92,190,304,824,2362,761350
147,E10000030,Surrey,"POLYGON ((492049.362 131507.340, 491404.687 13...",68,45,65,81,154,180,333,...,301,189,167,354,775,1043,1489,3300,6370,1196236
148,E10000031,Warwickshire,"POLYGON ((433693.311 236453.095, 432857.313 23...",50,36,59,49,64,45,149,...,189,164,109,116,222,536,1305,2730,4361,577933
149,E10000032,West Sussex,"MULTIPOLYGON (((485831.764 92236.717, 485216.7...",31,56,36,27,70,71,211,...,169,139,106,96,308,522,1038,2595,5119,863980


In [312]:
# gdf_ut.to_file(utla_folder/"utla_covid_EN_new.shp")

In [322]:
year = ['October', 'November', 'December', 'January' ]

In [323]:
gdf_ut["pop"]= pd.Series.copy(df_pops["population"])

In [324]:
columns_chosen = [12, 15, 18, 21]

In [325]:
for item in columns_chosen:
    print(gdf_ut.columns[item])

2020-09-29
2020-10-29
2020-11-28
2020-12-28


In [326]:
gdf_ut_filtered = gdf_ut.iloc[:, 6:-1:3].iloc[:,2:6]
gdf_ut_filtered["geometry"] = gdf_ut.geometry.copy()
gdf_ut_filtered

Unnamed: 0,2020-09-29,2020-10-29,2020-11-28,2020-12-28,geometry
0,347,558,321,1120,"POLYGON ((450022.099 526039.600, 449679.001 52..."
1,575,830,392,1311,"POLYGON ((453314.913 515472.255, 454376.905 51..."
2,377,799,278,934,"MULTIPOLYGON (((464636.314 512421.190, 464248...."
3,803,1103,470,1384,"MULTIPOLYGON (((446120.310 510524.689, 446312...."
4,264,459,313,752,"POLYGON ((430852.381 510073.154, 430509.720 50..."
...,...,...,...,...,...
146,359,817,922,5789,"MULTIPOLYGON (((630151.397 234198.716, 628268...."
147,1175,2387,2560,13100,"POLYGON ((492049.362 131507.340, 491404.687 13..."
148,662,1824,1079,3706,"POLYGON ((433693.311 236453.095, 432857.313 23..."
149,386,1227,922,8980,"MULTIPOLYGON (((485831.764 92236.717, 485216.7..."
