<h1>Creating Geography lookup tables</h1>

This file creates a postcode look up using NSPL.<br>
This output can be be joined to tables read in using area/district/sector. <br>
Where a postcode crosses regional boundries a sclaing factor is applied. 
This is to be used when calculating the number of cardholders, spend etc.

In [None]:
project_path = "/home/jupyter"
import sys
sys.path.append(project_path)

import numpy as np
import pandas as pd

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t
from google.cloud import bigquery

In [None]:
client = bigquery.Client()

In [None]:
sql_nspl = """SELECT *
FROM ons-fintrans-analysis-prod.fin_wip_notebook.nspl_postal_level_lookup"""

df_nspl = bq.read_bq_table_sql(client, sql_nspl)

In [None]:
df_nspl

In [None]:
#Create Region Lookup
r = {'rgn':['E12000001','E12000002','E12000003', 'E12000004', 'E12000005','E12000006','E12000007','E12000008','E12000009','W99999999','S99999999','N99999999','L99999999','M99999999'], 
     'region':['North East','North West','Yorkshire and The Humber','East Midlands','West Midlands','East of England','London','South East','South West','Wales', 'Scotland','Northern Ireland','Channel Islands','Isle of Man']}
df_region = pd.DataFrame(data = r)

In [None]:
#Create Country Lookup
c = {'ctry':['E92000001','N92000002','S92000003','W92000004','L93000001','M83000003'], 
     'country':['England','Northern Ireland','Scotland','Wales','Channel Islands','Isle of Man']}
df_country = pd.DataFrame(data = c)

In [None]:
#Merge Lookups
df_nspl = pd.merge(df_nspl, df_region, how="left", on=["rgn"])
df_nspl = pd.merge(df_nspl, df_country, how="left", on=["ctry"])

In [None]:
#POSTAL AREA
df_postal_area = df_nspl[['pcds','postal_area','region', 'ctry']].groupby(['postal_area', 'ctry','region'],as_index=False).count().reset_index()
df_postal_area['area_sf'] = df_postal_area['pcds'] / df_postal_area.groupby('postal_area')['pcds'].transform('sum')
df_postal_area = df_postal_area[['postal_area', 'area_sf', 'region', 'ctry']]

In [None]:
#POSTAL DISTRICT
df_postal_district = df_nspl[['pcds','postal_district','region', 'ctry']].groupby(['postal_district', 'ctry','region'],as_index=False).count().reset_index()
df_postal_district['district_sf'] = df_postal_district['pcds'] / df_postal_district.groupby('postal_district')['pcds'].transform('sum')
df_postal_district = df_postal_district[['postal_district', 'district_sf', 'region', 'ctry']]

In [None]:
#POSTAL SECTOR
df_postal_sector = df_nspl[['pcds','postal_sector','region', 'ctry']].groupby(['postal_sector', 'ctry','region'],as_index=False).count().reset_index()
df_postal_sector['sector_sf'] = df_postal_sector['pcds'] / df_postal_sector.groupby('postal_sector')['pcds'].transform('sum')
df_postal_sector = df_postal_sector[['postal_sector', 'sector_sf', 'region', 'ctry']]

In [None]:
#ALL
df_postal_all = df_nspl[['postal_area','postal_district', 'postal_sector', 'region', 'ctry']]
df_postal_all = df_postal_all[df_postal_all['region'].notna()]
df_postal_all = df_postal_all.drop_duplicates()
df_postal_all = pd.merge(df_postal_all, df_postal_area, how="left", on=['postal_area', 'region', 'ctry'])
df_postal_all = pd.merge(df_postal_all, df_postal_district, how="left", on=['postal_district', 'region', 'ctry'])
df_postal_all = pd.merge(df_postal_all, df_postal_sector, how="left", on=['postal_sector', 'region', 'ctry'])

In [None]:
#df_postal_all.to_csv("~/fintrans_toolbox/data/NSPL_location_level_lookup.csv", index=False)

In [None]:
#saves dataframe as big query table
#to replace dataframe, change if_exists = 'replace'
df_postal_all.to_gbq('ons-fintrans-analysis-prod.fin_wip_notebook.nspl_postal_level_scaling_factors', project_id=None, chunksize=None, reauth=False, 
                 if_exists='fail', auth_local_webserver=True, table_schema=None, 
                 location=None, progress_bar=True, credentials=None)