<h1>Creating Geography lookup tables</h1>

This file creates a postcode locations from the NSPL_May_2022_UK.<br>
This output can be be joined to tables read in using area/district/sector. <br>

In [None]:
project_path = "/home/jupyter"
import sys
sys.path.append(project_path)

import numpy as np
import pandas as pd

from fintrans_toolbox.src import bq_utils as bq
from google.cloud import bigquery

In [None]:
client = bigquery.Client()

In [None]:
#Read in NSPL
sql_nspl = """SELECT DISTINCT pcds, doterm, rgn, ctry, ru11ind, imd
FROM ons-fintrans-data-prod.fintrans_reference.NSPL_MAY_2022_UK"""

df_nspl = bq.read_bq_table_sql(client, sql_nspl)
#Remove postcodes that terminated pre 2019
df_nspl = df_nspl[(df_nspl.doterm >= 201901) |  (df_nspl.doterm.isna())]

In [None]:
#Caluclate area, district, sector
df_nspl[['postal_district','postal_sector']] = df_nspl["pcds"].str.split(" ", 1, expand=True)
df_nspl['postal_sector'] = df_nspl['postal_sector'].str.strip()
df_nspl['postal_sector'] = df_nspl['postal_sector'].str[:1]
df_nspl['postal_sector'] = df_nspl['postal_district'] + " " +  df_nspl['postal_sector']
df_nspl['postal_area'] = df_nspl['postal_sector'].str[:2]
df_nspl['postal_area'] = df_nspl['postal_area'].str.replace('\d+', '', regex=True)
df_nspl['postal_area'] = df_nspl['postal_area'].str[:2]

#remove whitespace
df_nspl['postal_area'] = df_nspl['postal_area'].str.strip()
df_nspl['postal_sector'] = df_nspl['postal_sector'].str.strip()
df_nspl['postal_district'] = df_nspl['postal_district'].str.strip()

In [None]:
#Create Region Lookup
r = {'rgn':['E12000001','E12000002','E12000003', 'E12000004', 'E12000005','E12000006','E12000007','E12000008','E12000009','W99999999','S99999999','N99999999','L99999999','M99999999'], 
     'region':['North East','North West','Yorkshire and The Humber','East Midlands','West Midlands','East of England','London','South East','South West','Wales', 'Scotland','Northern Ireland','Channel Islands','Isle of Man']}
df_region = pd.DataFrame(data = r)

In [None]:
#Create Country Lookup
c = {'ctry':['E92000001','N92000002','S92000003','W92000004','L93000001','M83000003'], 
     'country':['England','Northern Ireland','Scotland','Wales','Channel Islands','Isle of Man']}
df_country = pd.DataFrame(data = c)

In [None]:
#Merge Lookups
df_nspl = pd.merge(df_nspl, df_region, how="left", on=["rgn"])
df_nspl = pd.merge(df_nspl, df_country, how="left", on=["ctry"])

In [None]:
df_nspl = df_nspl[['postal_area', 'postal_district','postal_sector', 'pcds', 'region', 'country', 'ru11ind', 'imd']]
df_nspl = df_nspl.sort_values('pcds')

In [None]:
#df_nspl.to_csv("~/fintrans_toolbox/data/NSPL_location_lookup.csv", index=False)

In [None]:
#saves dataframe as big query table
#to replace dataframe, change if_exists = 'replace'
df_nspl.to_gbq('ons-fintrans-analysis-prod.fin_wip_notebook.nspl_postal_level_lookup', project_id=None, chunksize=None, reauth=False, 
                 if_exists='replace', auth_local_webserver=True, table_schema=None, 
                 location=None, progress_bar=True, credentials=None)