The referendum passed on June 23, 2016, with 52% approval rate in the vote. The official withdrawal process began at March 29, 2017.

In [1]:
import re
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, brunnermunzel

In [2]:
jobs = pd.read_csv('data/job_listings.csv')
employment = pd.read_csv('data/employment_by_occupation.csv')
labor = pd.read_csv('data/labor_market_statistics.csv')
immigrant = pd.read_csv('data/immigrant_statistics.csv')
london_stocks = pd.read_csv('data/LSE_historical_data.csv')
uk_bills = pd.read_csv('data/UK_bill_data.csv')

# legends
immigrant_legend = pd.read_csv('data/code_legends/immigrant_legend.csv')
labor_legend = pd.read_csv('data/code_legends/labor_market_stats_legend.csv')
onet_legend = pd.read_csv('data/code_legends/onet_code_legend.csv')
soc_legend = pd.read_csv('data/code_legends/soc_code_legend.csv')

  interactivity=interactivity, compiler=compiler, result=result)


### Immigrant by Region

In [3]:
immigrant = immigrant.sort_values(['Year', 'Quarter'])

In [4]:
col_list = list(immigrant)
print(col_list[43])  # noting b/c it has non-applicable dates
col_list.remove('Year')
col_list.remove('Quarter')
# for col in col_list:
#     res = immigrant[col].map(lambda s: re.sub(r'[a-zA-Z,]', '', str(s)))
#     immigrant[col] = res.map(lambda s: '0' if len(s) == 0 else s)
immigrant_total = immigrant.fillna(0).astype({col: 'int64' for col in col_list}, )

ACS-BSS


In [5]:
aggregate_regions = [
    'AFN',
    'AFS',
    'AMN',
    'ACS',
    'ACE',
    'AEA',
    'ASO',
    'ASE',
    'EU1',
    'EU2',
    'EU8',
    'EUO',
    'ERO',
    'MIE',
    'OCE',
    'OTH',
]
immi_agg_df = immigrant_total[['Year', 'Quarter', *aggregate_regions]]

In [6]:
immigrant['Total'] = immi_agg_df[aggregate_regions].sum(axis=1)

In [7]:
after_ref_idx = 38  # index for first quarter after referendum passes
after_owd_idx = 41  # index for first quarter after official withdrawl process begins

In [8]:
total_immigration = immigrant['Total']
before_ref = total_immigration[:after_ref_idx]
after_ref = total_immigration[after_ref_idx:]

ttest_ind(before_ref, after_ref)

Ttest_indResult(statistic=2.526587649848202, pvalue=0.015103438133696223)

In [9]:
print('Before referedum mean:', np.mean(before_ref))
print('After referedum mean:', np.mean(after_ref))

Before referedum mean: 44156.18421052631
After referedum mean: 35095.88888888889


In [10]:
# there is a significant drop in immigration numbers in Q4 2013 - Q1 2014
significant_peak_idx = 28
ttest_ind(total_immigration[significant_drop_idx:after_ref_idx], total_immigration[after_ref_idx:])

NameError: name 'significant_drop_idx' is not defined

In [None]:
col_list = list(immigrant)
col_list.remove('Year')
col_list.remove('Quarter')

alpha = 0.05
num_tests = 4 * len(col_list)
alpha_star = alpha / num_tests

# not a modular function lol
def ttest_for_drop_immigrant(sep_idx, alpha):
    signif_with_peak_regions = []
    signif_without_peak_regions = []
    for col in col_list:
        before = immigrant[col][:sep_idx].dropna()
        before_without_peak = immigrant[col][significant_peak_idx:sep_idx].dropna()
        after = immigrant[col][sep_idx:].dropna()
        
        ttest_res = ttest_ind(before, after)
        if ttest_res.pvalue < alpha:
            signif_with_peak_regions.append(col)
        
        ttest_res = ttest_ind(before_without_peak, after)
        if ttest_res.pvalue < alpha:
            signif_without_peak_regions.append(col)
    
    return signif_with_peak_regions, signif_without_peak_regions

signif_with_peak_regions_immigrant_ref, signif_without_peak_regions_immigrant_ref = ttest_for_drop_immigrant(after_ref_idx, alpha_star)
signif_with_peak_regions_immigrant_owd, signif_without_peak_regions_immigrant_owd = ttest_for_drop_immigrant(after_owd_idx, alpha_star)

print(len(signif_with_peak_regions_immigrant_ref), signif_with_peak_regions_immigrant_ref)
print(len(signif_without_peak_regions_immigrant_ref), signif_without_peak_regions_immigrant_ref)

print(len(signif_with_peak_regions_immigrant_owd), signif_with_peak_regions_immigrant_owd)
print(len(signif_without_peak_regions_immigrant_owd), signif_without_peak_regions_immigrant_owd)

In [None]:
set(signif_without_peak_regions_immigrant_ref) & set(signif_without_peak_regions_immigrant_owd)

### Labor Market Statistics

In [None]:
stat_prefixes = ['AGO', 'AIW', 'YCB']
exact_stats = []
# exact_stats = ['YCBF', 'YCBL']
relevant_labor_stats = [st for st in list(labor) if any([st.startswith(prefix) for prefix in stat_prefixes]) or any([st == stat for stat in exact_stats])]
print(relevant_labor_stats)
print(len(relevant_labor_stats))

In [None]:
labor_quarterly = labor[labor['Month'].map(lambda m: m.startswith('Q'))].reset_index(drop=True)

In [None]:
# reduce to 2007-2018 time range
labor_quarterly = labor_quarterly[24:]

In [None]:
alpha = 0.05
num_tests = 4 * len(relevant_labor_stats)
alpha_star = alpha / num_tests

# not a modular function lol
def ttest_for_drop_labor(sep_idx, alpha):
    signif_with_peak_regions = []
    signif_without_peak_regions = []
    for col in relevant_labor_stats:
        before = labor_quarterly[col][:sep_idx].dropna()
        before_without_peak = labor_quarterly[col][significant_peak_idx:sep_idx].dropna()
        after = labor_quarterly[col][sep_idx:].dropna()
        
        ttest_res = ttest_ind(before, after)
        if ttest_res.pvalue < alpha:
            signif_with_peak_regions.append(col)
        
        ttest_res = ttest_ind(before_without_peak, after)
        if ttest_res.pvalue < alpha:
            signif_without_peak_regions.append(col)
    
    return signif_with_peak_regions, signif_without_peak_regions

signif_with_peak_regions_labor_ref, signif_without_peak_regions_labor_ref = ttest_for_drop_labor(after_ref_idx, alpha_star)
signif_with_peak_regions_labor_owd, signif_without_peak_regions_labor_owd = ttest_for_drop_labor(after_owd_idx, alpha_star)

print(len(signif_with_peak_regions_labor_ref), signif_with_peak_regions_labor_ref)
print(len(signif_without_peak_regions_labor_ref), signif_without_peak_regions_labor_ref)

print(len(signif_with_peak_regions_labor_owd), signif_with_peak_regions_labor_owd)
print(len(signif_without_peak_regions_labor_owd), signif_without_peak_regions_labor_owd)

In [None]:
set(signif_with_peak_regions_labor_ref) & set(signif_with_peak_regions_labor_owd)

In [None]:
set(signif_without_peak_regions_labor_ref) & set(signif_without_peak_regions_labor_owd)