In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
import math

import pandas as pd
import pandas_gbq
import numpy as np

# Import from local data files
current_path = os.path.abspath('.')
sys.path.append(os.path.dirname(current_path))

from data_access.data_factory import DataFactory as factory
from data_access import prep_pipeline as pp
from utils import data as dt
from utils import config as cf

In [None]:
static_df = pp.read_data('static')

In [None]:
static_vars = factory.get('static_vars').create_dataframe()

In [None]:
static_vars_fake = pd.DataFrame()

region_cols = ['LSOA11CD', 'LSOA11NM', 'MSOA11CD', 'MSOA11NM', 'LTLA20CD', 'LTLA20NM',
       'UTLA20CD', 'UTLA20NM', 'RGN19CD', 'RGN19NM']

static_vars_fake[region_cols] = static_vars[region_cols].sample(n=1000).reset_index(drop=True)

static_vars_filt = static_vars[static_vars['LSOA11CD'].str.startswith('E')]

for col in [col for col in static_vars_filt.columns if col not in region_cols]:
    static_vars_fake[col] = static_vars_filt[col].sample(n=1000).reset_index(drop=True)

In [None]:
static_vars_fake.to_gbq('wip.unit_test_static_vars', project_id = 'ons-hotspot-prod')

In [None]:
mid_year_lsoa = factory.get('mid_year_lsoa').create_dataframe()

In [None]:
# this is publically available so no need to fudge the join in any way
mid_year_lsoa.head()

In [None]:
lsoa_list = static_vars_fake['LSOA11CD'].unique()
mid_year_lsoa = mid_year_lsoa[mid_year_lsoa['LSOA11CD'].isin(lsoa_list)]

In [None]:
mid_year_lsoa.to_gbq('wip.unit_test_mid_year_lsoa', project_id = 'ons-hotspot-prod')

In [None]:
mobility_clusters_processed = factory.get('mobility_clusters_processed').create_dataframe()

In [None]:
mob = mobility_clusters_processed.sample(n=1000).reset_index(drop=True)

mob['LSOA11CD'] = lsoa_list
mob

In [None]:
mob.to_gbq('wip.unit_test_mobility_clusters', project_id = 'ons-hotspot-prod')

In [None]:
flow_to_work = factory.get('flow_to_work').create_dataframe()

In [None]:
flow = flow_to_work.sample(n=1000)

flow['LSOA11CD'] = lsoa_list

In [None]:
flow.to_gbq('wip.unit_test_flow_to_work', project_id = 'ons-hotspot-prod')

In [None]:
lsoa_2011 = factory.get('LSOA_2011').create_dataframe() # this is also publically available

In [None]:
lsoa_2011 = lsoa_2011[lsoa_2011['LSOA11CD'].isin(lsoa_list)]

In [None]:
lsoa_2011.to_gbq('wip.unit_test_lsoa_2011', project_id = 'ons-hotspot-prod')

In [None]:
table_list = [mid_year_lsoa,
        mob,
        flow,
        lsoa_2011]

df_final = static_vars_fake.copy()

for table in table_list:
    df_final = df_final.merge(table.copy(), on='LSOA11CD', how='outer', suffixes=['', '_drop'])

In [None]:
drop_cols = [col for col in df_final.columns if col.endswith('_drop')]
df_final.drop(columns=drop_cols, inplace=True)

df_final = df_final[df_final['LSOA11CD'].str.startswith('E')]

In [None]:
df_final2 = factory.get('unit_test_static').create_dataframe()

In [None]:
df_final.sort_values(by='LSOA11CD', inplace=True)
df_final.reset_index(drop=True, inplace=True)

df_final2.sort_values(by='LSOA11CD', inplace=True)
df_final2.reset_index(drop=True, inplace=True)

df_final['geometry'] = df_final['geometry'].astype(str)
df_final2['geometry'] = df_final2['geometry'].astype(str)


In [None]:
pd.testing.assert_frame_equal(df_final, df_final2)