In [1]:
import pandas as pd
import requests
import json
from datetime import timedelta
import numpy as np

In [2]:
request = requests.get("https://coronavirus.data.gov.uk/downloads/json/coronavirus-cases_latest.json")
requestJson = json.loads(request.content)
ltlasDf = pd.DataFrame(requestJson['ltlas'])
countriesDf = pd.DataFrame(requestJson['countries']) 
regionsDf = pd.DataFrame(requestJson['regions'])
utlasDf = pd.DataFrame(requestJson['utlas'])

In [3]:
lowerToUpperDf = pd.read_csv("data/Lower_Tier_Local_Authority_to_Upper_Tier_Local_Authority_(April_2019)_Lookup_in_England_and_Wales.csv")
lowerToRegionDf = pd.read_csv("data/Local_Authority_District_to_Region_(April_2019)_Lookup_in_England.csv")

In [4]:
print(ltlasDf.shape, countriesDf.shape, regionsDf.shape, utlasDf.shape, lowerToUpperDf.shape)

(24455, 10) (136, 10) (1076, 10) (13653, 10) (339, 5)


In [5]:
ltlasDf.specimenDate = pd.to_datetime(ltlasDf.specimenDate)
countriesDf.specimenDate = pd.to_datetime(countriesDf.specimenDate)
regionsDf.specimenDate = pd.to_datetime(regionsDf.specimenDate)
utlasDf.specimenDate = pd.to_datetime(utlasDf.specimenDate)

In [6]:
ltlasDf = ltlasDf[['areaCode', 'areaName', 'specimenDate', 'dailyLabConfirmedCases', 'totalLabConfirmedCases']]

In [7]:
df1 = ltlasDf.drop_duplicates(subset=['areaCode','areaName'])[['areaCode','areaName']]
dtDf = pd.DataFrame(pd.date_range(ltlasDf.specimenDate.min(),ltlasDf.specimenDate.max(),freq='1 D'), columns=['specimenDate'])

df1['key'] = 0
dtDf['key'] = 0

df1 = df1.merge(dtDf, how='outer').drop(columns=['key'],axis=1)
ltlasDf = df1.merge(ltlasDf, how='left', on=['areaCode','areaName','specimenDate']).fillna(0)

In [8]:
ltlasDf = pd.merge(left=ltlasDf, 
        right=lowerToUpperDf,
        how="left", 
        left_on="areaCode",
        right_on="LTLA19CD")
ltlasDf = ltlasDf.drop(['LTLA19CD','LTLA19NM','FID'], axis=1)

In [9]:
ltlasDf = ltlasDf.rename(columns={"UTLA19CD" : "upperRegionCode", 'UTLA19NM' : "upperRegionName"})

In [10]:
ltlasDf = pd.merge(left=ltlasDf,
    right=ltlasDf.groupby(['specimenDate', 'upperRegionCode'])['dailyLabConfirmedCases'].sum().reset_index(),
    how="left",
    left_on=['specimenDate','upperRegionCode'],
    right_on=['specimenDate','upperRegionCode'],
    suffixes =["","UpperRegion"])

In [11]:
ltlasDf = pd.merge(left=ltlasDf, 
        right=lowerToRegionDf,
        how="left", 
        left_on="areaCode",
        right_on="LAD19CD")
ltlasDf = ltlasDf.drop(['LAD19CD','LAD19NM','FID'], axis=1)
ltlasDf = ltlasDf.rename(columns={"RGN19CD" : "regionCode", 'RGN19NM' : "regionName"})

In [12]:
ltlasDf = pd.merge(left=ltlasDf,
    right=ltlasDf.groupby(['specimenDate', 'regionCode'])['dailyLabConfirmedCases'].sum().reset_index(),
    how="left",
    left_on=['specimenDate','regionCode'],
    right_on=['specimenDate','regionCode'],
    suffixes =["","Region"])

In [13]:
ltlasDf.dailyLabConfirmedCases = ltlasDf.dailyLabConfirmedCases.fillna(0)

In [14]:
ltlasDf['areaMovingAverage7'] = ltlasDf.groupby('areaCode')['dailyLabConfirmedCases'].transform(lambda x: x.rolling(7, 1).mean())
ltlasDf['upperRegionMovingAverage7'] = ltlasDf.groupby('upperRegionCode')['dailyLabConfirmedCasesUpperRegion'].transform(lambda x: x.rolling(7, 1).mean())
ltlasDf['regionMovingAverage7'] = ltlasDf.groupby('regionCode')['dailyLabConfirmedCasesRegion'].transform(lambda x: x.rolling(7, 1).mean())

In [57]:
ltlasSumDf = ltlasDf.groupby('areaCode')['dailyLabConfirmedCases'].sum().reset_index()
tmp = ltlasDf[ltlasDf.specimenDate > (ltlasDf.specimenDate.max() - timedelta(days=30))].groupby('areaCode')['dailyLabConfirmedCases'].sum()
ltlasSumDf['last30dCases'] = ltlasSumDf['areaCode'].map(tmp)

tmp = ltlasDf[(ltlasDf.specimenDate >= (ltlasDf.specimenDate.max() - timedelta(days=17))) & (ltlasDf.specimenDate <= (ltlasDf.specimenDate.max() - timedelta(days=4)))].groupby('areaCode')['dailyLabConfirmedCases'].sum()

tmp1 = ltlasDf[(ltlasDf.specimenDate >= (ltlasDf.specimenDate.max() - timedelta(days=31))) & (ltlasDf.specimenDate <= (ltlasDf.specimenDate.max() - timedelta(days=18)))].groupby('areaCode')['dailyLabConfirmedCases'].sum()

ltlasSumDf['rFirst14'] = ltlasSumDf['areaCode'].map(tmp)
ltlasSumDf['rSecond14'] = ltlasSumDf['areaCode'].map(tmp1)
ltlasSumDf['rBasic'] = np.round(ltlasSumDf['rFirst14'] /  ltlasSumDf['rSecond14'],2)

ltlasSumDf.dailyLabConfirmedCases = ltlasSumDf.dailyLabConfirmedCases.astype('int')
ltlasSumDf.last30dCases = ltlasSumDf.last30dCases.astype('int')
ltlasSumDf.rFirst14 = ltlasSumDf.rFirst14.astype('int')
ltlasSumDf.rSecond14= ltlasSumDf.rSecond14.astype('int')
ltlasSumDf['rBasic'] = ltlasSumDf['rBasic'].fillna(0)
ltlasSumDf['rBasic'] = ltlasSumDf['rBasic'].replace(np.inf, ltlasSumDf['rFirst14'])

In [65]:
tmp = lowerToUpperDf[['LTLA19CD','LTLA19NM']].drop_duplicates()
tmp = tmp.rename(columns={'LTLA19CD': 'areaCode','LTLA19NM': 'area_name'})
ltlastop10last30d = pd.merge(
left=ltlasSumDf,
right=tmp,
how='left')

In [66]:
ltlastop10last30d = ltlastop10last30d.sort_values(by='last30dCases', ascending=False).head(10)[['area_name','last30dCases']]

Unnamed: 0,area_name,last30dCases
249,Manchester,173
50,Bedford,162
15,Leicester,156
254,Tameside,154
270,Birmingham,153
263,Doncaster,152
277,Bradford,141
45,Cheshire East,138
265,Sheffield,135
46,Cheshire West and Chester,134


Unnamed: 0,areaCode,dailyLabConfirmedCases,last30dCases,rFirst14,rSecond14,rBasic,area_name
0,E06000001,352,29,10,20,0.50,Hartlepool
1,E06000002,697,27,8,19,0.42,Middlesbrough
2,E06000003,430,17,1,23,0.04,Redcar and Cleveland
3,E06000004,647,64,28,47,0.60,Stockton-on-Tees
4,E06000005,409,33,10,27,0.37,Darlington
...,...,...,...,...,...,...,...
311,E09000029,768,28,10,20,0.50,Sutton
312,E09000030,649,18,10,8,1.25,Tower Hamlets
313,E09000031,777,14,5,9,0.56,Waltham Forest
314,E09000032,1009,14,6,6,1.00,Wandsworth


In [53]:
ltlasSumDf.areaCode.map(tmp)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [29]:
ltlasSumDf.areaCode.map(tmp)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [17]:
ltlasSumDf.sort_values(by='last30dCases', ascending=False)

Unnamed: 0,areaCode,dailyLabConfirmedCases,last30dCases,rFirst14,rSecond14,rBasic
249,E08000003,1727,173,87,98,0.89
50,E06000055,801,162,92,67,1.37
15,E06000016,1028,156,75,67,1.12
254,E08000008,866,154,78,82,0.95
270,E08000025,3347,153,52,111,0.47
...,...,...,...,...,...,...
84,E07000046,36,0,0,0,0.00
108,E07000083,182,0,0,1,0.00
228,E07000224,165,0,0,1,0.00
85,E07000047,53,0,0,0,0.00


In [17]:
print(ltlasDf.specimenDate.max())
print(ltlasDf.specimenDate.max() - timedelta(days=17), ltlasDf.specimenDate.max() - timedelta(days=4))
print(ltlasDf.specimenDate.max() - timedelta(days=31), ltlasDf.specimenDate.max() - timedelta(days=18))

2020-06-21 00:00:00
2020-06-04 00:00:00 2020-06-17 00:00:00
2020-05-21 00:00:00 2020-06-03 00:00:00


In [18]:
today : 22
first : 4-17
second : 21-3

In [19]:
ltlasDf = ltlasDf.rename(
    columns={
        "dailyLabConfirmedCases" : "dcLower",
        "totalLabConfirmedCases" : "tcLower",
        "upperRegionCode" : "urCode",
        "upperRegionName" : "urName",
        "dailyLabConfirmedCasesUpperRegion" : "dcUpper",
        "regionCode" : "rCode",
        "regionName" : "rName",
        "dailyLabConfirmedCasesRegion" : "dcRegion",
        "areaMovingAverage7" : "ma7Lower",
        "upperRegionMovingAverage7" : "ma7Upper",
        "regionMovingAverage7" : "ma7Region"
    }
)

In [20]:
ltlasDf.to_json(path_or_buf="data/ltlas.json", orient="records", date_format='iso')

In [27]:
ltlasDf[ltlasDf.areaName.str.len()==35]

Unnamed: 0,areaCode,areaName,specimenDate,dcLower,tcLower,urCode,urName,dcUpper,rCode,rName,dcRegion,ma7Lower,ma7Upper,ma7Region
37728,E06000058,"Bournemouth, Christchurch and Poole",2020-01-30,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,0.0,0.000000,0.000000,2.714286
37729,E06000058,"Bournemouth, Christchurch and Poole",2020-01-31,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,0.0,0.000000,0.000000,1.857143
37730,E06000058,"Bournemouth, Christchurch and Poole",2020-02-01,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,0.0,0.000000,0.000000,1.428571
37731,E06000058,"Bournemouth, Christchurch and Poole",2020-02-02,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,0.0,0.000000,0.000000,1.000000
37732,E06000058,"Bournemouth, Christchurch and Poole",2020-02-03,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,2.0,0.000000,0.000000,0.857143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37867,E06000058,"Bournemouth, Christchurch and Poole",2020-06-17,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,3.0,0.285714,0.285714,5.000000
37868,E06000058,"Bournemouth, Christchurch and Poole",2020-06-18,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,3.0,0.285714,0.285714,4.285714
37869,E06000058,"Bournemouth, Christchurch and Poole",2020-06-19,1.0,491.0,E06000058,"Bournemouth, Christchurch and Poole",1.0,E12000009,South West,3.0,0.285714,0.285714,4.142857
37870,E06000058,"Bournemouth, Christchurch and Poole",2020-06-20,0.0,0.0,E06000058,"Bournemouth, Christchurch and Poole",0.0,E12000009,South West,3.0,0.142857,0.142857,4.142857
