# Travel and Tourism Reform Project

### Documentation

**Dataframes:** 
- df_qcontcust_2009_2019 -> all years data
- df_qcontcust_2009 to df_qcontcust_2009_2022 -> filtered from df_qcontcust_2009_2019 for each year

**Dictionaries**
- flow_dict -> contains flow codes (arrival/departure, foreign/UK) for all years
- Purpose_value_map_0919 -> Purpose of visit mapping for the years 2009 to 2019
- Purpose_value_map_22 -> Purpose of visit mapping for 2022
- Nationality_value_map_0919 -> mapping for Nationality of respondent - NEW CODES (2009-2019)
- Nationality_value_map_22 -> mapping for Nationality of respondent - NEW CODES (2022)

***


Importing Packages

In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import statsmodels.api as sm
from statsmodels.sandbox.stats.multicomp import multipletests

import scipy.stats as ss
from scipy.stats import kruskal
from scipy.stats import mannwhitneyu
from scipy.stats import chi2_contingency

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.utils.class_weight import compute_class_weight
from scikit_posthocs import posthoc_dunn

from itertools import product

from imblearn.over_sampling import RandomOverSampler

from tabulate import tabulate

Loading Data

In [3]:
df_qcontcust_2009_2019 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2013-UKDA-7380-tab\\tab\\qcontcust_2009_2019.tab", delimiter='\t')

df_qcontcust_2009 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2009]
df_qcontcust_2010 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2010]
df_qcontcust_2011 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2011]
df_qcontcust_2012 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2012]
df_qcontcust_2013 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2013]
df_qcontcust_2014 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2014]
df_qcontcust_2015 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2015]
df_qcontcust_2016 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2016]
df_qcontcust_2017 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2017]
df_qcontcust_2018 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2018]
df_qcontcust_2019 = df_qcontcust_2009_2019[df_qcontcust_2009_2019['Year'] == 2019]

df_qreg_2013 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2013-UKDA-7380-tab\\tab\\qreg_2013.tab", delimiter='\t')

df_airmiles_2021 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2021-UKDA-9040-tab\\tab\\airmiles2021.tab", delimiter='\t')
#df_alcohol_2021 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2021-UKDA-9040-tab\\tab\\alcohol_2021.tab", delimiter='\t')
df_qreg_2021 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2021-UKDA-9040-tab\\tab\\qreg_2021.tab", delimiter='\t')
#df_qcontcust_2021 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2021-UKDA-9040-tab\\tab\\qcontcust2021.tab", delimiter='\t')

df_airmiles_2020 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2020-UKDA-8661-tab\\tab\\airmiles_q12020.tab", delimiter='\t')
#df_alcohol_2020 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2020-UKDA-8661-tab\\tab\\alcohol_q12020.tab", delimiter='\t')
df_qreg_2020 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2020-UKDA-8661-tab\\tab\\qreg_q12020.tab", delimiter='\t')
# df_qcontcust_2020 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2020-UKDA-8661-tab\\tab\\qcontcust_q12020.tab", delimiter='\t')


  df_qcontcust_2009_2019 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2013-UKDA-7380-tab\\tab\\qcontcust_2009_2019.tab", delimiter='\t')
  df_qreg_2013 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2013-UKDA-7380-tab\\tab\\qreg_2013.tab", delimiter='\t')
  df_airmiles_2021 = pd.read_csv("C:\\Users\\medasud\\Downloads\\2021-UKDA-9040-tab\\tab\\airmiles2021.tab", delimiter='\t')


In [None]:
flow_dict = {
    1.0: "Air Departure Foreign",
    2.0: "Air Departure UK",
    3.0: "Air Arrival Foreign",
    4.0: "Air Arrival UK",
    5.0: "Sea Departure Foreign",
    6.0: "Sea Departure UK",
    7.0: "Sea Arrival Foreign",
    8.0: "Sea Arrival UK"
}
df_qcontcust_2013['Flow_Label'] = df_qcontcust_2013['Flow'].replace(flow_dict)