In [1]:
import os, time, json
import subprocess
import pandas as pd
import numpy as np
from collections import Counter

# Raw HTS Data

In [2]:
data_dir = 'D:/L3/data/raw_data/HTS_data'
hh_dpath = os.path.join(data_dir, 'NHTS_households_RA.csv')
persons_dpath = os.path.join(data_dir, 'NHTS_persons_RA.csv')
trips_dpath = os.path.join(data_dir, 'NHTS_trips_RA.csv')

hh_df = pd.read_csv(hh_dpath)
persons_df = pd.read_csv(persons_dpath)
trips_df = pd.read_csv(trips_dpath)

# Trip Purposes

In [3]:
purpose_counter = Counter(list(trips_df['Trip_Purpose']))
purpose_counter = dict(sorted(purpose_counter.items(), key=lambda item: -item[1]))
for purpose, count in purpose_counter.items():
    print(f'{purpose}: {count}')

Go Home: 442
Commuting: 218
Go to School: 70
Shopping: 52
Entertainment: 48
Pick up: 37
Others: 36
Visiting Friends: 13
Business: 12
Go Back to Workplace / School: 12
Go to Hospital: 6


# Generate and Save Motif

In [4]:
use_time = 'From_Time'
motif_records = []
person_ids = list(set(trips_df['Person_ID']))
for person_id in person_ids:
    person_trips_df = trips_df.loc[
        trips_df['Person_ID'] == person_id].sort_values('Trip_ID')
    this_motif = {'PID': person_id}
    this_motif.update({f'hour_{hour}': 'H' for hour in range(24)})
    for idx, trip in person_trips_df.iterrows():
        trip_time = trip[use_time]
        trip_hour, trip_minute = [int(x) for x in trip_time.split(':')]
        purpose = trip['Trip_Purpose']
        if purpose in ['Commuting', 'Go Back to Workplace / School', 'Business']:
            activity = 'Work'
            alphabet = 'W'
        elif purpose == 'Go Home':
            activity = 'Home'
            alphabet = 'H'
        elif purpose == 'Go to School':
            activity = 'School'
            alphabet = 'C'
        elif purpose == 'Shopping':
            activity = 'Shop'
            alphabet = 'S'
        elif purpose in ['Entertainment', 'Others']:
            activity = 'Recreation'
            alphabet = 'R'
        elif purpose == 'Go to Hospital':
            activity = 'Health'
            alphabet = 'E'
        elif purpose in ['Visiting Friends']:
            activity = 'Visit'
            alphabet = 'V'
        elif purpose == 'Pick up':
            activity = 'Pick-up-Drop-off'
            alphabet = 'P'
        else:
            raise ValueError('Unrecognized trip purpose')
            
        
        updated_motif = {f'hour_{hour}': alphabet 
                         for hour in range(trip_hour, 24)}
        this_motif.update(updated_motif)
    motif_records.append(this_motif)
motif_df = pd.DataFrame(motif_records)

In [5]:
motif_df.to_csv('motif.csv', index=False)
motif_df.head()

Unnamed: 0,PID,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,...,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23
0,4403040080060710083,H,H,H,H,H,H,H,C,C,...,C,C,C,H,H,H,H,H,H,H
1,4403040080070080162,H,H,H,H,H,H,H,W,W,...,W,W,W,H,H,H,H,H,H,H
2,4403040080070080122,H,H,H,H,H,H,H,H,H,...,W,W,W,H,H,H,H,H,H,H
3,4403040080110360033,H,H,H,H,H,H,H,S,S,...,H,H,H,H,H,H,H,H,H,H
4,4403040080060010054,H,H,H,H,H,H,H,C,C,...,C,C,H,H,H,H,H,H,H,H
