In [1]:
from pathlib import Path

import polars as pl

from foundata import cmap, nhts, verify
from foundata.utils import check_overlap, filter_time_consistent, table_joiner

In [2]:
configs_root = Path("configs")

# CMAP

In [None]:
hh_config = configs_root / "cmap" / "hh_dictionary.yaml"
person_config = configs_root / "cmap" / "person_dictionary.yaml"
trip_config = configs_root / "cmap" / "trip_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/CMAP")

In [None]:
hhs = cmap.load_households(data_root, hh_config)
hhs.head()

In [None]:
persons = cmap.load_persons(data_root, person_config)
persons.head()

In [None]:
attributes = table_joiner(hhs, persons, on="hid").with_columns(
    country=pl.lit("usa"), source=pl.lit("cmap")
)

In [None]:
rurality = cmap.load_rurality(configs_root)
rurality_mapping = cmap.load_locations(data_root, rurality_table=rurality)
trips = cmap.load_trips(
    data_root, trip_config, rurality_mapping=rurality_mapping
)

In [None]:
attributes, trips = filter_time_consistent(attributes, trips)

In [None]:
attributes.head()

In [None]:
trips.head()

In [None]:
# check for missing pids
# if missing from trips then assume stay at home
check_overlap(attributes, trips, on="pid")

In [None]:
verify.columns(attributes, trips)

# NHTS

In [3]:
hh_config = configs_root / "nhts" / "hh_dictionary.yaml"
person_config = configs_root / "nhts" / "person_dictionary.yaml"
trip_config = configs_root / "nhts" / "trip_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/NHTS")

In [5]:
hhs = nhts.load_households(data_root, hh_config)
persons = nhts.load_persons(data_root, person_config)
attributes = table_joiner(hhs, persons, on="hid")

All 'hid' keys in table_b are present in table_a
All 'hid' keys in table_a are present in table_b


In [6]:
trips = nhts.load_trips(data_root, trip_config)

In [7]:
attributes, trips = filter_time_consistent(attributes, trips)

Total trips: 2750389, Total plans: 600866, from 716376 attributes
Removed 18 trips or 4 plans and 4 attributes due to time inconsistency


In [8]:
# check for missing pids
# if missing from trips then assume stay at home
check_overlap(attributes, trips, on="pid")



In [9]:
verify.columns(attributes, trips)

True

# NTS

In [None]:
hh_config = configs_root / "nts" / "hh_dictionary.yaml"
person_config = configs_root / "nts" / "person_dictionary.yaml"
trip_config = configs_root / "nts" / "trip_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/NTS")

In [None]:
hhs = nhts.load_households(data_root, hh_config)
persons = nhts.load_persons(data_root, person_config)
attributes = table_joiner(hhs, persons, on="hid")
trips = nhts.load_trips(data_root, trip_config)
attributes, trips = filter_time_consistent(attributes, trips)