# Scotland Local Area District - Notebook

In [13]:
import pandas as pd
import numpy as np
import zipfile
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import datetime
import urllib.request, urllib.error
import os.path
import datetime

## 1. Covid-19 Death Total by Local Authority

In [14]:
all_deaths_path = "data/sco_covid_data/custom_data/all_deaths_lad_18.csv"
covid_deaths_path = "data/sco_covid_data/custom_data/covid_deaths_lad_18.csv"
loc_deaths_all_path = "data/sco_covid_data/custom_data/deaths_by_location_all_lad_18.csv"
loc_deaths_corona_path = "data/sco_covid_data/custom_data/deaths_by_location_corona_lad_18.csv"
path = "https://www.nrscotland.gov.uk/files//statistics/covid19/"

In [15]:
lad_dict = {"S12000033":"Aberdeen City","S12000034":"Aberdeenshire","S12000041":"Angus","S12000035":"Argyll and Bute","S12000036":"City of Edinburgh","S12000005":"Clackmannanshire","S12000006":"Dumfries and Galloway","S12000042":"Dundee City","S12000008":"East Ayrshire","S12000045":"East Dunbartonshire","S12000010":"East Lothian","S12000011":"East Renfrewshire","S12000014":"Falkirk","S12000015":"Fife","S12000046":"Glasgow City","S12000017":"Highland","S12000018":"Inverclyde","S12000019":"Midlothian","S12000020":"Moray","S12000013":"Na h-Eileanan Siar","S12000021":"North Ayrshire","S12000044":"North Lanarkshire","S12000023":"Orkney Islands","S12000024":"Perth and Kinross","S12000038":"Renfrewshire","S12000026":"Scottish Borders","S12000027":"Shetland Islands","S12000028":"South Ayrshire","S12000029":"South Lanarkshire","S12000030":"Stirling","S12000039":"West Dunbartonshire","S12000040":"West Lothian"}

In [16]:
'''SUCESS: CODE GOOD TO GO'''
def get_week_num():
    return datetime.date.today().isocalendar()[1]

In [17]:
def convert_dates(old_date):
    date_object = datetime.datetime.strptime(old_date, '%d-%b-%y')
    new_format = date_object.strftime("%Y-%m-%d")
    return new_format

In [18]:
'''SUCCESS: CODE GOOD TO GO'''
def get_total_num_covid_deaths_per_week(path):
    
    week_num = get_week_num()-1
    url = path + "covid-deaths-data-week-%d.zip" % week_num
    conn = urllib.request.urlopen(url)
    
    if conn.getcode() == 200:

        zipfile = ZipFile(BytesIO(conn.read()))
        data_table_path = 'covid-deaths-data-week-%d_Table 1 - COVID deaths.csv' % week_num
        print(data_table_path)
        df_covid_deaths = pd.read_csv(zipfile.open(data_table_path),header = 3, encoding='unicode-escape')
        idx_start = np.where(df_covid_deaths['Unnamed: 1'] == 'Aberdeen City')[0][0]
        idx_end = idx_start + 32
        df_covid_deaths_trimmed = df_covid_deaths.iloc[idx_start:idx_end,1:week_num+2]
        df_covid_deaths_trimmed = df_covid_deaths_trimmed.set_index('Unnamed: 1')
        
        return df_covid_deaths_trimmed

In [19]:
df_covid_deaths_per_week = get_total_num_covid_deaths_per_week(path)
df_covid_deaths_per_week.head()

covid-deaths-data-week-19_Table 1 - COVID deaths.csv


Unnamed: 0_level_0,30-Dec-19,6-Jan-20,13-Jan-20,20-Jan-20,27-Jan-20,3-Feb-20,10-Feb-20,17-Feb-20,24-Feb-20,2-Mar-20,9-Mar-20,16-Mar-20,23-Mar-20,30-Mar-20,6-Apr-20,13-Apr-20,20-Apr-20,27-Apr-20,4-May-20
Unnamed: 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Aberdeen City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,12.0,18.0,15.0,15.0,15.0
Aberdeenshire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,4.0,21.0,25.0,23.0,13.0,11.0
Angus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,8.0,12.0,19.0,7.0,5.0
Argyll and Bute,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,7.0,12.0,14.0,13.0,3.0
City of Edinburgh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,17.0,61.0,64.0,71.0,61.0,43.0


In [1]:
def get_total_deaths_for_lad(df_covid_deaths_per_week, lad_name):
    
    df_lad = df_covid_deaths_per_week.loc[[lad_name]].transpose()
    df_lad.reset_index(inplace=True, drop = False)
    df_lad = df_lad.rename(columns={'index':'date', lad_name:'value'})
    first_death_idx = df_lad.ne(0).idxmax()[1]
    df_lad['date'] = df_lad['date'].apply(lambda x : convert_dates(x))

    return df_lad.loc[first_death_idx:]

In [2]:
def populate_total_num_covid_deaths_per_week(df_covid_deaths_per_week, lad_dict, root_path):
    
    for lad in lad_dict.keys():
                
        df_lad = get_total_deaths_for_lad(df_covid_deaths_per_week, lad_dict.get(lad))
        path = root_path + f"{lad}.csv"
        current_dir = os.getcwd()
        curr = os.path.dirname(os.getcwd())
        print(current_dir + path)
        df_lad.to_csv(current_dir + path)

In [3]:
populate_total_num_covid_deaths_per_week(df_covid_deaths_per_week, lad_dict, '/data/lad/total_covid_deaths/')

NameError: name 'df_covid_deaths_per_week' is not defined

In [5]:
'''NEED TO REFACTOR THIS'''
def get_covid_deaths_df(path):
            
    # Try get get the most recent data
    try:
        url = path + "covid-deaths-data-week-%d.zip" % (get_week_num()-1)
        conn = urllib.request.urlopen(url)
        zipfile = ZipFile(BytesIO(conn.read()))
        
        df_covid_deaths = pd.read_csv(zipfile.open('covid-deaths-data-week-18_Table 1 - COVID deaths.csv'),header = 3, encoding='unicode-escape')
        df_all_deaths = pd.read_csv(zipfile.open('covid-deaths-data-week-18_Table 2 - All deaths.csv'),header = 3, encoding='unicode-escape')
        
        df_covid_deaths_trimmed = df_covid_deaths.iloc[48:80, 1:get_week_num()+1]
        df_all_deaths_trimmed = df_all_deaths.iloc[50:82, 1:get_week_num()+1]

        
        return df_all_deaths_trimmed, df_covid_deaths_trimmed
    
    # If error, then get last week response
    except urllib.error.HTTPError as e:
        url = path + "covid-deaths-data-week-%d.zip" % (get_week_num()-2)
        conn = urllib.request.urlopen(url)
        zipfile = ZipFile(BytesIO(conn.read()))
        
        df_covid_deaths = pd.read_csv(zipfile.open('covid-deaths-data-week-18_Table 1 - COVID deaths.csv'),header = 3, encoding='unicode-escape')
        
        idx_start = np.where(df_covid_deaths['Unnamed: 1'] == 'Aberdeen City')[0][0]
        idx_end = idx_start + 32
        df_covid_deaths_trimmed = df_covid_deaths.iloc[idx_start:idx_end,1:get_week_num()]
        df_covid_deaths_trimmed = df_covid_deaths_trimmed.set_index('Unnamed: 1')
        
        return df_covid_deaths_trimmed

In [61]:
df_covid_deaths_per_week = get_covid_deaths_df(path)
df_covid_deaths_per_week.head()

NameError: name 'get_covid_deaths_df' is not defined

Unnamed: 0_level_0,30-Dec-19,6-Jan-20,13-Jan-20,20-Jan-20,27-Jan-20,3-Feb-20,10-Feb-20,17-Feb-20,24-Feb-20,2-Mar-20,9-Mar-20,16-Mar-20,23-Mar-20,30-Mar-20,6-Apr-20,13-Apr-20,20-Apr-20,27-Apr-20
Unnamed: 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Aberdeen City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,12.0,18.0,15.0,15.0
Aberdeenshire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,4.0,21.0,25.0,23.0,13.0
Angus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,8.0,12.0,19.0,7.0
Argyll and Bute,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,7.0,12.0,14.0,13.0
City of Edinburgh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,17.0,61.0,64.0,71.0,61.0
Clackmannanshire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,3.0,4.0,4.0,11.0
Dumfries and Galloway,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,9.0,10.0,3.0,6.0
Dundee City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,21.0,29.0,27.0,22.0
East Ayrshire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,10.0,12.0,11.0,10.0
East Dunbartonshire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,9.0,12.0,18.0,20.0


In [31]:
old_date = "16-Mar-20"

In [32]:
convert_dates(old_date)

'2020-03-16'

In [33]:
df_aberdeen_total_covid = get_total_deaths_for_lad(df_covid_deaths_per_week, 'Aberdeen City')

In [34]:
df_aberdeen_total_covid

Unnamed: 1,date,value
11,2020-03-16,1.0
12,2020-03-23,0.0
13,2020-03-30,2.0
14,2020-04-06,12.0
15,2020-04-13,18.0
16,2020-04-20,15.0
17,2020-04-27,15.0


In [72]:
df_aberdeen_total_covid.to_csv('aberdeen_total_covid.csv')

In [73]:
df_aberdeen_total_covid.ne(0).idxmax()[1]

11

In [37]:
lad_dict = {"S12000033":"Aberdeen City","S12000034":"Aberdeenshire","S12000041":"Angus","S12000035":"Argyll and Bute","S12000036":"City of Edinburgh","S12000005":"Clackmannanshire","S12000006":"Dumfries and Galloway","S12000042":"Dundee City","S12000008":"East Ayrshire","S12000045":"East Dunbartonshire","S12000010":"East Lothian","S12000011":"East Renfrewshire","S12000014":"Falkirk","S12000015":"Fife","S12000046":"Glasgow City","S12000017":"Highland","S12000018":"Inverclyde","S12000019":"Midlothian","S12000020":"Moray","S12000013":"Na h-Eileanan Siar","S12000021":"North Ayrshire","S12000044":"North Lanarkshire","S12000023":"Orkney Islands","S12000024":"Perth and Kinross","S12000038":"Renfrewshire","S12000026":"Scottish Borders","S12000027":"Shetland Islands","S12000028":"South Ayrshire","S12000029":"South Lanarkshire","S12000030":"Stirling","S12000039":"West Dunbartonshire","S12000040":"West Lothian"}

In [38]:
lad_dict.get("S12000033")

'Aberdeen City'

In [40]:
populate_covid_lad_death_data(df_covid_deaths_per_week, lad_dict, '/data/lad/total_covid_deaths/')

/data/lad/total_covid_deaths/S12000033.csv
/data/lad/total_covid_deaths/S12000034.csv
/data/lad/total_covid_deaths/S12000041.csv
/data/lad/total_covid_deaths/S12000035.csv
/data/lad/total_covid_deaths/S12000036.csv
/data/lad/total_covid_deaths/S12000005.csv
/data/lad/total_covid_deaths/S12000006.csv
/data/lad/total_covid_deaths/S12000042.csv
/data/lad/total_covid_deaths/S12000008.csv
/data/lad/total_covid_deaths/S12000045.csv
/data/lad/total_covid_deaths/S12000010.csv
/data/lad/total_covid_deaths/S12000011.csv
/data/lad/total_covid_deaths/S12000014.csv
/data/lad/total_covid_deaths/S12000015.csv
/data/lad/total_covid_deaths/S12000046.csv
/data/lad/total_covid_deaths/S12000017.csv
/data/lad/total_covid_deaths/S12000018.csv
/data/lad/total_covid_deaths/S12000019.csv
/data/lad/total_covid_deaths/S12000020.csv
/data/lad/total_covid_deaths/S12000013.csv
/data/lad/total_covid_deaths/S12000021.csv
/data/lad/total_covid_deaths/S12000044.csv
/data/lad/total_covid_deaths/S12000023.csv
/data/lad/t