In [None]:
from pathlib import Path

import polars as pl

from foundata import cmap, nhts, nts, qhts, verify, vista
from foundata.utils import (
    check_overlap,
    filter_time_consistent,
    load_yaml_config,
    table_joiner,
)

In [None]:
configs_root = Path("configs")

# LTDS

# VISTA

In [None]:
hh_config_path = configs_root / "vista" / "hh_dictionary.yaml"
person_config_path = configs_root / "vista" / "person_dictionary.yaml"
trip_config_path = configs_root / "vista" / "trip_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/VISTA")

hh_config = load_yaml_config(configs_root / "vista" / "hh_dictionary.yaml")
person_config = load_yaml_config(person_config_path)
trips_config = load_yaml_config(trip_config_path)

years = ["2012-2020", "2022-2023", "2023-2024"]

attributes, trips = vista.load_years(
    years=years,
    data_root=data_root,
    hh_config=hh_config,
    person_config=person_config,
    trips_config=trips_config,
)

attributes, trips = filter_time_consistent(attributes, trips, on="pid")

# check for missing pids
# if missing from trips then assume stay at home
check_overlap(attributes, trips, on="pid")

verify.columns(attributes, trips)

# QHTS

In [None]:
hh_config_path = configs_root / "qhts" / "hh_dictionary.yaml"
person_config_path = configs_root / "qhts" / "person_dictionary.yaml"
trip_config_path = configs_root / "qhts" / "trip_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/QHTS")

years = ["2019-22", "2022-25"]
# years = ["2022-25"]

hh_config = load_yaml_config(hh_config_path)
person_config = load_yaml_config(person_config_path)
trips_config = load_yaml_config(trip_config_path)

zone_mapping = qhts.load_zone_mapping(
    configs_root / "qhts" / "sa1-correspondence-file.csv"
)

attributes, trips = qhts.load_years(
    data_root=data_root,
    years=years,
    hh_config=hh_config,
    person_config=person_config,
    trips_config=trips_config,
    zones_mapping=zone_mapping,
)

attributes, trips = filter_time_consistent(attributes, trips, on="pid")

# check for missing pids
# if missing from trips then assume stay at home
check_overlap(attributes, trips, on="pid")

verify.columns(attributes, trips)

trips.head()

# CMAP

In [None]:
hh_config = configs_root / "cmap" / "hh_dictionary.yaml"
person_config = configs_root / "cmap" / "person_dictionary.yaml"
trip_config = configs_root / "cmap" / "trip_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/CMAP")

rurality = cmap.load_rurality(configs_root)

hhs = cmap.load_households(data_root, hh_config)
hh_locations = cmap.load_home_locations(data_root, rurality_table=rurality)
hhs = hhs.join(hh_locations, on="hid", how="left")

persons = cmap.load_persons(data_root, person_config)

attributes = table_joiner(hhs, persons, on="hid").with_columns(
    country=pl.lit("usa"), source=pl.lit("cmap")
)

rurality_mapping = cmap.load_locations(data_root, rurality_table=rurality)
trips = cmap.load_trips(
    data_root, trip_config, rurality_mapping=rurality_mapping
)

attributes, trips = filter_time_consistent(attributes, trips)

# check for missing pids
# if missing from trips then assume stay at home
check_overlap(attributes, trips, on="pid")

verify.columns(attributes, trips)

# NHTS

In [None]:
hh_config = configs_root / "nhts" / "hh_dictionary.yaml"
person_config = configs_root / "nhts" / "person_dictionary.yaml"
trip_config = configs_root / "nhts" / "trip_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/NHTS")

hhs = nhts.load_households(data_root, hh_config)
persons = nhts.load_persons(data_root, person_config)
attributes = table_joiner(hhs, persons, on="hid")

trips = nhts.load_trips(data_root, trip_config)

attributes, trips = filter_time_consistent(attributes, trips)

# check for missing pids
# if missing from trips then assume stay at home
check_overlap(attributes, trips, on="pid")

verify.columns(attributes, trips)

# NTS

In [None]:
hh_config = configs_root / "nts" / "hh_dictionary.yaml"
person_config = configs_root / "nts" / "person_dictionary.yaml"
trip_config = configs_root / "nts" / "trip_dictionary.yaml"
days_config = configs_root / "nts" / "day_dictionary.yaml"

data_root = Path("/home/fred/Data/foundata/NTS")

hhs = nts.load_households(data_root, hh_config)
persons = nts.load_persons(data_root, person_config)
attributes = table_joiner(hhs, persons, on="hid")

trips = nts.load_trips(data_root, trip_config)
days = nts.load_days(data_root, days_config)
trips = table_joiner(trips, days, on="did")

trips, attributes = nts.split_days(
    trips, attributes, on_split="pdid", on_base="pid"
)
attributes, trips = filter_time_consistent(attributes, trips, on="pid")

# check for missing pids
# if missing from trips then assume stay at home
check_overlap(attributes, trips, on="pid")

verify.columns(attributes, trips)