In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os
from copulas.multivariate import GaussianMultivariate
import numpy as np
from itertools import combinations
import importlib
import logging
import itertools
import ast


In [2]:
Data_folder = os.getcwd() + "/Data"

Data_folder

'/home/trapfishscott/Cambridge24.25/Energy_thesis/Data'

In [43]:
## Import pickls

car_df = pd.read_pickle(Data_folder + "/df_car.pkl")

In [45]:
# Trip type by travel day by journey sequence
x = car_df.groupby(["IndividualID", "TravDay", "JourSeq"])[["TripType"]].first()
x.head()
# Trip type by travel day by journey sequence
x = x.groupby(["IndividualID", "TravDay"])[["TripType"]].agg(list)

x = x.reset_index()

In [6]:
x["SumTrips"] = x["TripType"].apply(lambda x: len(x))

In [7]:
days = np.tile(np.arange(1,8), len(x["IndividualID"].unique()))

individuals = [i for i in x["IndividualID"].unique() for _ in range(7)]

new_df = pd.DataFrame({"IndividualID": individuals,
                       "TravDay": days })

merged_df = new_df.merge(x, on=["IndividualID", "TravDay"], how="left")

merged_df = merged_df.fillna(0)

merged_df["TripType"] = merged_df["TripType"].apply(lambda x: str(x))


In [8]:


print(f"There are {len(merged_df["TripType"].unique())} unique trip sequences.")


There are 5921 unique trip sequences.


In [9]:
merged_df

Unnamed: 0,IndividualID,TravDay,TripType,SumTrips
0,2002000001,1,0,0.0
1,2002000001,2,0,0.0
2,2002000001,3,0,0.0
3,2002000001,4,0,0.0
4,2002000001,5,0,0.0
...,...,...,...,...
1211583,2023017379,3,"[(3, 1), (1, 3), (3, 2), (2, 3)]",4.0
1211584,2023017379,4,"[(3, 2), (2, 3)]",2.0
1211585,2023017379,5,0,0.0
1211586,2023017379,6,"[(3, 1), (1, 3)]",2.0


In [10]:
# Finding if 1 trip, 2 trips, 3 trips ... per day. How many unique trip sequences in total + probabilities for different kinds of trip sequences

trip_probs = merged_df[["SumTrips", "TripType"]].groupby("SumTrips").value_counts(normalize=True).reset_index()

In [25]:
trip_probs["TripType"] = trip_probs["TripType"].apply(ast.literal_eval)

In [26]:
trip_probs

Unnamed: 0,SumTrips,TripType,proportion
0,0.0,0,1.000000
1,1.0,"[(3, 2)]",0.359880
2,1.0,"[(2, 3)]",0.358808
3,1.0,"[(3, 1)]",0.137585
4,1.0,"[(1, 3)]",0.094877
...,...,...,...
5916,20.0,"[(3, 2), (2, 3), (3, 2), (2, 2), (2, 3), (3, 2...",0.333333
5917,21.0,"[(3, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2...",0.333333
5918,21.0,"[(3, 2), (2, 2), (2, 2), (2, 2), (2, 3), (3, 2...",0.333333
5919,21.0,"[(3, 2), (2, 3), (3, 2), (2, 2), (2, 2), (2, 3...",0.333333


In [66]:
pop_seq_weights = {}

for trip_length in trip_probs.SumTrips.unique():
    by_trip_length = trip_probs[trip_probs["SumTrips"] == trip_length]
    print(by_trip_length.head())
    population = list(by_trip_length["TripType"])
    weights = list(by_trip_length["proportion"])
    #print(population[0])
    #print(weights[0])
    pop_seq_weights[f"trip_length_{int(trip_length)}"] = [population, weights]



   SumTrips TripType  proportion
0       0.0        0         1.0
   SumTrips  TripType  proportion
1       1.0  [(3, 2)]    0.359880
2       1.0  [(2, 3)]    0.358808
3       1.0  [(3, 1)]    0.137585
4       1.0  [(1, 3)]    0.094877
5       1.0  [(2, 2)]    0.043675
    SumTrips          TripType  proportion
8        2.0  [(3, 2), (2, 3)]    0.583721
9        2.0  [(3, 1), (1, 3)]    0.357063
10       2.0  [(2, 2), (2, 2)]    0.013905
11       2.0  [(1, 3), (3, 1)]    0.013148
12       2.0  [(2, 2), (2, 3)]    0.007467
    SumTrips                  TripType  proportion
50       3.0  [(3, 2), (2, 2), (2, 3)]    0.452526
51       3.0  [(3, 1), (1, 2), (2, 3)]    0.208922
52       3.0  [(3, 2), (2, 1), (1, 3)]    0.091577
53       3.0  [(3, 2), (2, 3), (3, 2)]    0.040018
54       3.0  [(2, 3), (3, 2), (2, 3)]    0.037630
     SumTrips                          TripType  proportion
206       4.0  [(3, 2), (2, 3), (3, 2), (2, 3)]    0.480470
207       4.0  [(3, 1), (1, 3), (3, 2), (2, 3)

In [42]:
pop_seq_weights["trip_length_2"][1]

[0.5837208681750808,
 0.35706332282925907,
 0.013905162522045213,
 0.0131482869625759,
 0.007466868500149192,
 0.00654745876605025,
 0.004451495678289073,
 0.0030056692890464095,
 0.0021978501823051227,
 0.002040167774082349,
 0.001855800650621875,
 0.0009533720989469241,
 0.0009048544348783784,
 0.0006113225672636765,
 0.0005167131223300122,
 0.00038814131254836604,
 0.00026684715237700166,
 0.00026442126917357436,
 0.00020134830588446488,
 5.579531367882762e-05,
 4.8517664068545755e-05,
 4.8517664068545755e-05,
 4.366589766169118e-05,
 3.638824805140932e-05,
 3.396236484798203e-05,
 3.153648164455474e-05,
 2.9110598441127454e-05,
 2.6684715237700166e-05,
 2.4258832034272877e-05,
 1.9407065627418304e-05,
 1.9407065627418304e-05,
 9.703532813709152e-06,
 9.703532813709152e-06,
 9.703532813709152e-06,
 9.703532813709152e-06,
 7.2776496102818635e-06,
 7.2776496102818635e-06,
 4.851766406854576e-06,
 4.851766406854576e-06,
 4.851766406854576e-06,
 2.425883203427288e-06,
 2.425883203427288

In [63]:
for i,(k,v) in enumerate(pop_seq_weights.items()):
    if i <=4:
        print(k, v[0][0], v[1][0])

trip_length_0 0 1.0
trip_length_1 [(3, 2)] 0.3598797699947726
trip_length_2 [(3, 2), (2, 3)] 0.5837208681750808
trip_length_3 [(3, 2), (2, 2), (2, 3)] 0.45252596816400226
trip_length_4 [(3, 2), (2, 3), (3, 2), (2, 3)] 0.4804698049806865


In [61]:
r = car_df["TripType"].value_counts(normalize=True).reset_index()

#1 work
#2 other
#3 home

mapping = {(2,3): "other->home",
           (3,2): "home->other",
           (3,1): "home->work",
           (2,2): "other->other",
           (1,3): "work->home",
           (1,2): "work->other",
           (2,1): "other->work"}

r["TripType_mapped"] = r["TripType"].map(mapping)

r

Unnamed: 0,TripType,proportion,TripType_mapped
0,"(2, 3)",0.32467,other->home
1,"(3, 2)",0.318645,home->other
2,"(3, 1)",0.106809,home->work
3,"(2, 2)",0.104834,other->other
4,"(1, 3)",0.099178,work->home
5,"(1, 2)",0.026362,work->other
6,"(2, 1)",0.019503,other->work
