# Adding activity chains to synthetic populations 

The purpose of this script is to match each individual in the synthetic population to a respondant from the [National Travel Survey (NTS)](https://beta.ukdataservice.ac.uk/datacatalogue/studies/study?id=5340). 

### Methods

We use statistical matching, as described in [An unconstrained statistical matching algorithm for combining individual and household level geo-specific census and survey data](https://doi.org/10.1016/j.compenvurbsys.2016.11.003). 

In [2]:
import pandas as pd


## Step 1: Decide on matching variables  

We need to identify the sociodemographic charachteristics that we will match on. Let's see what variables exist in (a) the NTS, and (b) our synthetic population

### 1a: Exploring the NTS

In [4]:
individuals = "../data/nts/UKDA-5340-tab/tab/indev_eul_2002-2022.tab"
households = "../data/nts/UKDA-5340-tab/tab/household_eul_2002-2022.tab"
trips = "../data/nts/UKDA-5340-tab/tab/trip_eul_2002-2022.tab"

In [5]:
# what year do we want to look at?
year = 2019

In [6]:
households = pd.read_csv(
    households,
    sep="\t",
)

households

Unnamed: 0,HouseholdID,PSUID,TWSDay,TWSMonth,TWSMonth_B01ID,TWSYear,TWSDate,TWSWeek,TWSWeekday_B01ID,TWEDay,...,HHIncQDS2005Eng_B01ID,HHIncQIS2006_B01ID,HHIncQDS2006_B01ID,HHIncQIS2006Eng_B01ID,HHIncQDS2006Eng_B01ID,HHIncQIS2007_B01ID,HHIncQDS2007_B01ID,HHIncQIS2007Eng_B01ID,HHIncQDS2007Eng_B01ID,SurveyYear
0,2002000260,2002000021,16,1,1,2002,1/16/2002 0:00:00,3,3,22,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
1,2002008705,2002000674,16,12,12,2002,12/16/2002 0:00:00,50,1,22,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
2,2002005959,2002000467,7,10,10,2002,10/7/2002 0:00:00,40,1,13,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
3,2002005961,2002000467,5,10,10,2002,10/5/2002 0:00:00,40,6,11,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
4,2002008815,2002000681,27,12,12,2001,12/27/2001 0:00:00,52,4,2,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161879,2022004365,2022000756,13,1,1,2023,1/13/2023 0:00:00,2,5,19,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022
161880,2022004366,2022000756,28,1,1,2023,1/28/2023 0:00:00,4,6,3,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022
161881,2022004367,2022000756,26,1,1,2023,1/26/2023 0:00:00,4,4,1,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022
161882,2022004368,2022000756,10,2,2,2023,2/10/2023 0:00:00,6,5,16,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022


In [15]:
households[households['SurveyYear'] == year]

Unnamed: 0,HouseholdID,PSUID,TWSDay,TWSMonth,TWSMonth_B01ID,TWSYear,TWSDate,TWSWeek,TWSWeekday_B01ID,TWEDay,...,HHIncQDS2005Eng_B01ID,HHIncQIS2006_B01ID,HHIncQDS2006_B01ID,HHIncQIS2006Eng_B01ID,HHIncQDS2006Eng_B01ID,HHIncQIS2007_B01ID,HHIncQDS2007_B01ID,HHIncQIS2007Eng_B01ID,HHIncQDS2007Eng_B01ID,SurveyYear
142954,2019001895,2019000208,4,5,5,2019,5/4/2019 0:00:00,18,6,10,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
142955,2019002676,2019000295,20,6,6,2019,6/20/2019 0:00:00,25,4,26,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
142956,2019001891,2019000207,20,5,5,2019,5/20/2019 0:00:00,20,1,26,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
142957,2019002687,2019000297,15,6,6,2019,6/15/2019 0:00:00,24,6,21,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
142958,2019001913,2019000209,24,5,5,2019,5/24/2019 0:00:00,21,5,30,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149738,2019003877,2019000426,5,9,9,2019,9/5/2019 0:00:00,36,4,11,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
149739,2019003871,2019000425,9,9,9,2019,9/9/2019 0:00:00,36,1,15,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
149740,2019003895,2019000427,22,8,8,2019,8/22/2019 0:00:00,34,4,28,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019
149741,2019006069,2019000671,19,12,12,2019,12/19/2019 0:00:00,51,4,25,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2019


In [6]:
households

Unnamed: 0,HouseholdID,PSUID,TWSDay,TWSMonth,TWSMonth_B01ID,TWSYear,TWSDate,TWSWeek,TWSWeekday_B01ID,TWEDay,...,HHIncQDS2005Eng_B01ID,HHIncQIS2006_B01ID,HHIncQDS2006_B01ID,HHIncQIS2006Eng_B01ID,HHIncQDS2006Eng_B01ID,HHIncQIS2007_B01ID,HHIncQDS2007_B01ID,HHIncQIS2007Eng_B01ID,HHIncQDS2007Eng_B01ID,SurveyYear
0,2002000260,2002000021,16,1,1,2002,1/16/2002 0:00:00,3,3,22,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
1,2002008705,2002000674,16,12,12,2002,12/16/2002 0:00:00,50,1,22,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
2,2002005959,2002000467,7,10,10,2002,10/7/2002 0:00:00,40,1,13,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
3,2002005961,2002000467,5,10,10,2002,10/5/2002 0:00:00,40,6,11,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
4,2002008815,2002000681,27,12,12,2001,12/27/2001 0:00:00,52,4,2,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161879,2022004365,2022000756,13,1,1,2023,1/13/2023 0:00:00,2,5,19,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022
161880,2022004366,2022000756,28,1,1,2023,1/28/2023 0:00:00,4,6,3,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022
161881,2022004367,2022000756,26,1,1,2023,1/26/2023 0:00:00,4,4,1,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022
161882,2022004368,2022000756,10,2,2,2023,2/10/2023 0:00:00,6,5,16,...,-10,-10,-10,-10,-10,-10,-10,-10,-10,2022
