In [38]:
import polars as pl
import json

from pathlib import Path

In [39]:
cmap_root = Path("~/Data/foundata/cmap")

hhs = (
    pl.read_csv(cmap_root / "household.csv", ignore_errors=True)
    .fill_nan(-9)
    .fill_null(-9)
)
persons = (
    pl.read_csv(cmap_root / "person.csv", ignore_errors=True).fill_nan(-9).fill_null(-9)
)
trips = (
    pl.read_csv(cmap_root / "gps_place.csv", ignore_errors=True)
    .fill_nan(-9)
    .fill_null(-9)
)
vehicles = (
    pl.read_csv(cmap_root / "vehicle.csv", ignore_errors=True)
    .fill_nan(-9)
    .fill_null(-9)
)

In [40]:
hhs.tail()

sampno,travdate,travday,hhsize,hhveh,transprob,transprob_o,hlive,restylast,restylast_o,restylast_zip,hhinc2,homeown,homeown_o,futuresurvey,incendonate,resty,resty_o,hhinc,download,source,source_o,hhtrips,phase,wthhfin
i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64
70100991,"""2018-10-04""",5,3,3,1,-1,2,2,-1,60827,-1,3,-1,1,0,1,-1,2,1,2,-1,9,0,647.0892
70100992,"""2018-09-25""",3,3,3,1,-1,34,-1,-1,-1,-1,2,-1,1,0,2,-1,8,2,97,-9,4,0,305.085
70100993,"""2018-09-10""",2,2,3,3,-1,25,-1,-1,-1,-1,1,-1,1,-7,1,-1,7,2,7,-1,8,0,615.7184
70100998,"""2019-04-23""",3,1,0,97,-9,2,3,-1,60624,-1,3,-1,2,0,3,-1,2,2,97,-9,6,0,74.5194
70100999,"""2019-04-24""",4,1,1,3,-1,35,-1,-1,-1,-1,3,-1,1,0,1,-1,1,2,97,-9,5,0,313.6208


In [41]:
# extract year and month
hhs = hhs.with_columns(
    pl.col("travdate").str.split("-").list.get(0).str.to_integer().alias("year"),
    pl.col("travdate").str.split("-").list.get(1).str.to_integer().alias("month"),
)

# map
with open("cmap/hh_dictionary.json") as f:
    hh_mapper = json.load(f)
hh_mapper = {c: {int(k): v for k, v in d.items()} for c, d in hh_mapper.items()}

hhs = hhs.with_columns(
    pl.col(col).replace(mapping, default=None).fill_null(pl.col(col))
    for col, mapping in hh_mapper.items()
)

# check for numerics
string_cols = [
    col for col in hhs.columns
    if hhs[col].dtype == pl.String
]
can_integer_cols = [
    col for col in string_cols
    if hhs[col].str.to_integer(strict=False).null_count() == 0
]
can_float_cols = [
    col for col in string_cols
    if hhs[col].cast(pl.Float64, strict=False).null_count() == 0
]

hhs = hhs.with_columns([
    pl.col(can_integer_cols).cast(pl.Int64)
])

hhs


(Deprecated in version 1.0.0)
  pl.col(col).replace(mapping, default=None).fill_null(pl.col(col))


sampno,travdate,travday,hhsize,hhveh,transprob,transprob_o,hlive,restylast,restylast_o,restylast_zip,hhinc2,homeown,homeown_o,futuresurvey,incendonate,resty,resty_o,hhinc,download,source,source_o,hhtrips,phase,wthhfin,year,month
i64,str,str,i64,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,i64,str,f64,i64,i64
20000083,"""2017-10-11""","""Wednesday""",1,1,"""Not ascertained""","""Appropriate skip""","""2""","""An apartment or condo,""","""Appropriate skip""","""60194""","""Appropriate skip""","""Rent""","""Appropriate skip""","""Yes""","""Not ascertained""","""An apartment or condo,""","""Appropriate skip""","""$60,000 to $74,999""","""Receive in the mail""","""Appropriate skip""","""Appropriate skip""",9,"""Pilot 1""",155.2391,2017,10
20000136,"""2017-10-23""","""Monday""",3,1,"""Not ascertained""","""Appropriate skip""","""1""","""Single-family attached house (…","""Appropriate skip""","""2116""","""Appropriate skip""","""Rent""","""Appropriate skip""","""Yes""","""Not ascertained""","""An apartment or condo,""","""Appropriate skip""","""$100,000 to $149,999""","""Receive in the mail""","""Appropriate skip""","""Appropriate skip""",4,"""Pilot 1""",319.6422,2017,10
20000228,"""2017-10-01""","""Monday""",1,1,"""Not ascertained""","""Appropriate skip""","""10""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Own""","""Appropriate skip""","""No""","""Not ascertained""","""Single-family detached house,""","""Appropriate skip""","""$150,000 or more""","""Receive in the mail""","""Appropriate skip""","""Appropriate skip""",9,"""Pilot 1""",540.4961,2017,10
20000248,"""2017-09-21""","""Thursday""",3,1,"""Not ascertained""","""Appropriate skip""","""26""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Occupied without payment of re…","""Appropriate skip""","""Yes""","""Not ascertained""","""An apartment or condo,""","""Appropriate skip""","""Less than $15,000""","""Receive in the mail""","""Appropriate skip""","""Appropriate skip""",4,"""Pilot 1""",662.6029,2017,9
20000300,"""2017-09-13""","""Wednesday""",1,0,"""Not ascertained""","""Appropriate skip""","""8""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Rent""","""Appropriate skip""","""Yes""","""Not ascertained""","""An apartment or condo,""","""Appropriate skip""","""$50,000 to $59,999""","""Print""","""Appropriate skip""","""Appropriate skip""",2,"""Pilot 1""",68.701,2017,9
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
70100991,"""2018-10-04""","""Thursday""",3,3,"""Too much traffic""","""Appropriate skip""","""2""","""Single-family attached house (…","""Appropriate skip""","""60827""","""Appropriate skip""","""Rent""","""Appropriate skip""","""Yes""","""Send the gift to my household""","""Single-family detached house,""","""Appropriate skip""","""$15,000 to $24,999""","""Print""","""Facebook""","""Appropriate skip""",9,"""Main""",647.0892,2018,10
70100992,"""2018-09-25""","""Tuesday""",3,3,"""Too much traffic""","""Appropriate skip""","""34""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Own with mortgage""","""Appropriate skip""","""Yes""","""Send the gift to my household""","""Single-family attached house (…","""Appropriate skip""","""$75,000 to $99,999""","""Receive in the mail""","""Other""","""Not ascertained""",4,"""Main""",305.085,2018,9
70100993,"""2018-09-10""","""Monday""",2,3,"""Roads and bridges are in poor …","""Appropriate skip""","""25""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Own without mortgage""","""Appropriate skip""","""Yes""","""I prefer not to answer""","""Single-family detached house,""","""Appropriate skip""","""$60,000 to $74,999""","""Receive in the mail""","""Partner emails""","""Appropriate skip""",8,"""Main""",615.7184,2018,9
70100998,"""2019-04-23""","""Tuesday""",1,0,"""Other:""","""-9""","""2""","""An apartment or condo,""","""Appropriate skip""","""60624""","""Appropriate skip""","""Rent""","""Appropriate skip""","""No""","""Send the gift to my household""","""An apartment or condo,""","""Appropriate skip""","""$15,000 to $24,999""","""Receive in the mail""","""Other""","""Not ascertained""",6,"""Main""",74.5194,2019,4


In [42]:
# map
with open("cmap/person_dictionary.json") as f:
    person_mapper = json.load(f)
person_mapper = {c: {int(k): v for k, v in d.items()} for c, d in person_mapper.items()}

persons = persons.with_columns(
    pl.col(col).replace(mapping, default=None).fill_null(pl.col(col))
    for col, mapping in person_mapper.items()
)

# check for numerics
string_cols = [
    col for col in persons.columns
    if persons[col].dtype == pl.String
]
can_integer_cols = [
    col for col in string_cols
    if persons[col].str.to_integer(strict=False).null_count() == 0
]
can_float_cols = [
    col for col in string_cols
    if persons[col].cast(pl.Float64, strict=False).null_count() == 0
]

persons = persons.with_columns([
    pl.col(can_integer_cols).cast(pl.Int64)
])

persons


(Deprecated in version 1.0.0)
  pl.col(col).replace(mapping, default=None).fill_null(pl.col(col))


sampno,perno,date_completed,retmode_final,retmode,proxy,proxyperno,age,aage,age18,sex,relate,relate_o,lic,hisp,hisp_o,race,race_o,smrtphn,emply_ask,jobs,wkstat,wkstat_o,volun_freq,wplace,wmode,wmode_o,wparkride,pervh,wrkhrs,wtrav,occup,occup_o,indus,indus_o,emply_transit,emply_transit_o,…,nogowhy2_4,nogowhy2_5,nogowhy2_rf,nogowhy2_dk,traveldatause,traveldatause_pilot,traveldatadevice,traveldatadevice_1,traveldatadevice_2,traveldatadevice_3,traveldatadevice_se,traveldatadevice_rf,traveldatadevice_dk,traveldatadevice_o,traveldatamode,traveldatamode_1,traveldatamode_2,traveldatamode_3,traveldatamode_4,traveldatamode_5,traveldatamode_se,traveldatamode_rf,traveldatamode_dk,traveldatamode_o,dtype,dtype_1,dtype_2,dtype_3,dtype_4,dtype_5,dtype_6,dtype_se,dtype_rf,dtype_dk,qc_trip_person,pertrips,wtperfin
i64,i64,str,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,i64,str,f64
20000083,1,"""2017-10-12""","""WEB""","""Smartphone App & Web""","""No""",1,"""45""","""Appropriate skip""","""Appropriate skip""","""Male""","""Self""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""White ""","""Appropriate skip""","""Yes""","""Yes""","""1""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Same work place every day""","""Auto / van / truck (as the dri…","""Appropriate skip""","""Appropriate skip""","""No""","""42""","""5 days a week""","""Business and Financial Operati…","""Appropriate skip""","""44-45""","""Appropriate skip""","""Something else""","""UNKNOWN SUBSIDY PERCENT""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Rarely""","""2;3""","""Appropriate skip""","""Smartphones""","""Navigation devices""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""1;2;3""","""Driving""","""Public Transportation - Bus Sc…","""Public Transportation - Train …","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",1,"""9""",155.2391
20000136,1,"""2017-10-24""","""WEB""","""Smartphone App & Web""","""No""",1,"""33""","""Appropriate skip""","""Appropriate skip""","""Female""","""Self""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""White ""","""Appropriate skip""","""Yes""","""Yes""","""1""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Same work place every day""","""Auto / van / truck (as the dri…","""Appropriate skip""","""Appropriate skip""","""No""","""20""","""2 days a week""","""Healthcare Practitioners and T…","""Appropriate skip""","""Professional, Scientific, and …","""Appropriate skip""","""No subsidy offered""","""Appropriate skip""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Daily""","""Smartphones""","""Appropriate skip""","""Smartphones""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""1;2""","""Driving""","""Public Transportation - Bus Sc…","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",0,"""2""",319.6422
20000136,2,"""2017-10-24""","""WEB""","""Web Only""","""No""",2,"""34""","""Appropriate skip""","""Appropriate skip""","""Male""","""Spouse/Unmarried partner""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""White ""","""Appropriate skip""","""No""","""Yes""","""1""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Same work place every day""","""Walk""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""100""","""7 days a week""","""Healthcare Practitioners and T…","""Appropriate skip""","""Health Care and Social Assista…","""Appropriate skip""","""I don't know""","""Appropriate skip""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Weekly""","""Smartphones""","""Appropriate skip""","""Smartphones""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""1;2""","""Driving""","""Public Transportation - Bus Sc…","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",0,"""2""",319.6422
20000136,3,"""2017-10-24""","""-1""","""-1""","""Yes""",1,"""1""","""Appropriate skip""","""Appropriate skip""","""Male""","""Son/Daughter""","""Appropriate skip""","""Appropriate skip""","""No""","""Appropriate skip""","""White ""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""-1""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",0,"""Appropriate skip""",319.6422
20000228,1,"""2017-10-02""","""WEB""","""Smartphone App & Web""","""No""",1,"""50""","""Appropriate skip""","""Appropriate skip""","""Female""","""Self""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""Multiracial""","""Appropriate skip""","""Yes""","""Yes""","""1""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Work from home""","""[$I_DO] not travel to work""","""Appropriate skip""","""Appropriate skip""","""No""","""35""","""Appropriate skip""","""Business and Financial Operati…","""Appropriate skip""","""Professional, Scientific, and …","""Appropriate skip""","""No subsidy offered""","""Appropriate skip""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Rarely""","""Smartphones""","""Appropriate skip""","""Smartphones""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Public Transportation - Bus Sc…","""Appropriate skip""","""Public Transportation - Bus Sc…","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",1,"""9""",540.4961
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
70100992,3,"""2018-09-27""","""WEB""","""Web Only""","""No""",3,"""26""","""Appropriate skip""","""Appropriate skip""","""Male""","""Son/Daughter""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""White ""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""Unemployed but looking for wor…","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",…,"""Appropriate skip""","""I did none of these""","""Appropriate skip""","""Appropriate skip""","""No""","""-1""","""1;2;97""","""Message signs""","""Smartphones""","""Appropriate skip""","""Other""","""Appropriate skip""","""Appropriate skip""","""Radio""","""None""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""None""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",0,"""0""",305.085
70100993,1,"""2018-09-11""","""WEB""","""Web Only""","""No""",1,"""66""","""Appropriate skip""","""Appropriate skip""","""Male""","""Self""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""African American, Black""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""Retired ""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Yes""","""-1""","""Other""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Other""","""Appropriate skip""","""Appropriate skip""","""radio""","""Driving""","""Driving""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",0,"""6""",615.7184
70100993,2,"""2018-09-11""","""WEB""","""Web Only""","""Yes""",1,"""53""","""Appropriate skip""","""Appropriate skip""","""Female""","""Spouse/Unmarried partner""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""African American, Black""","""Appropriate skip""","""Yes""","""Yes""","""1""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Same work place every day""","""Auto / van / truck (as the dri…","""Appropriate skip""","""Appropriate skip""","""No""","""42""","""5 days a week""","""Community and Social Service O…","""Appropriate skip""","""Health Care and Social Assista…","""Appropriate skip""","""No subsidy offered""","""Appropriate skip""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Yes""","""-1""","""1;2""","""Message signs""","""Smartphones""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Driving""","""Driving""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",0,"""2""",615.7184
70100998,1,"""2019-04-24""","""WEB""","""Web and CATI""","""No""",1,"""64""","""Appropriate skip""","""Appropriate skip""","""Female""","""Self""","""Appropriate skip""","""No""","""No""","""Appropriate skip""","""African American, Black""","""Appropriate skip""","""Yes""","""No""","""Appropriate skip""","""Disabled non-worker""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",…,"""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""No""","""-1""","""I don't know""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""I don't know""","""Not ascertained""","""2;3""","""Appropriate skip""","""Public Transportation - Bus Sc…","""Public Transportation - Train …","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Not ascertained""","""Cane or Walker""","""Appropriate skip""","""Appropriate skip""","""Cane or Walker""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""","""Appropriate skip""",1,"""6""",74.5194


In [43]:
trips.head()

sampno,perno,dayno,placeno,locno,arrtime,deptime,travtime,actdur,distance,mode,mode_o,mode_imputed,vehno,tpurp,tpurp_o,tpurp2,tpurp2_o,tpurp_imputed,cravl,paypk,pkamt,pkbas,pkbas_o,prkty,prkty_o,payf,payf_o,fare,trip_appt,trip_appt_why,trip_appt_why_1,trip_appt_why_2,trip_appt_why_3,trip_appt_why_4,trip_appt_why_5,trip_appt_why_6,trip_appt_why_se,trip_appt_why_rf,trip_appt_why_dk,trip_appt_why_o,trip_appt_why2,trip_appt_why2_1,trip_appt_why2_2,trip_appt_why2_3,trip_appt_why2_4,trip_appt_why2_5,trip_appt_why2_se,trip_appt_why2_rf,trip_appt_why2_dk,trip_appt_why2_o,initiated_by,perno_1,perno_2,perno_3,perno_4,perno_5,perno_6,perno_7,perno_8,perno_9,perno_10,perno_11,perno_12,companions,hhcount,nonhhcount,hhparty,party
i64,i64,i64,i64,i64,str,str,i64,i64,f64,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
20000228,1,2,1,10405,"""2017-10-02 03:00:00""","""2017-10-02 05:24:23""",0,144,-9.0,202,-1,-9,-9,1,"""-1""",0,-1,4,-1,-9,-1,-1,-1,3,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
20000228,1,2,2,10000,"""2017-10-02 05:26:16""","""2017-10-02 12:06:32""",2,400,0.81,-9,-1,-9,-1,-9,"""-1""",-9,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9
20000228,1,2,3,2000024,"""2017-10-02 12:40:20""","""2017-10-02 13:23:41""",34,43,6.35,-9,-1,1,-1,-9,"""-1""",-9,-1,5,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9
20000228,1,2,4,10401,"""2017-10-02 13:29:30""","""2017-10-02 13:37:28""",6,8,1.47,-9,-1,1,-1,-9,"""-1""",-9,-1,5,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9
20000228,1,2,5,2000026,"""2017-10-02 13:52:33""","""2017-10-02 14:01:41""",15,9,0.14,-9,-1,2,-1,-9,"""-1""",-9,-1,5,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9


In [50]:
# map
with open("cmap/trip_dictionary.json") as f:
    trip_mapper = json.load(f)
trip_mapper = {c: {int(k): v for k, v in d.items()} for c, d in trip_mapper.items()}

trips = trips.with_columns(
    pl.col(col).replace_strict(mapping, default=None).fill_null(pl.col(col))
    for col, mapping in trip_mapper.items()
)

# check for numerics
string_cols = [
    col for col in trips.columns
    if trips[col].dtype == pl.String
]
can_integer_cols = [
    col for col in string_cols
    if trips[col].str.to_integer(strict=False).null_count() == 0
]
can_float_cols = [
    col for col in string_cols
    if trips[col].cast(pl.Float64, strict=False).null_count() == 0
]

trips = trips.with_columns([
    pl.col(can_integer_cols).cast(pl.Int64)
])

trips

sampno,perno,dayno,placeno,locno,arrtime,deptime,travtime,actdur,distance,mode,mode_o,mode_imputed,vehno,tpurp,tpurp_o,tpurp2,tpurp2_o,tpurp_imputed,cravl,paypk,pkamt,pkbas,pkbas_o,prkty,prkty_o,payf,payf_o,fare,trip_appt,trip_appt_why,trip_appt_why_1,trip_appt_why_2,trip_appt_why_3,trip_appt_why_4,trip_appt_why_5,trip_appt_why_6,trip_appt_why_se,trip_appt_why_rf,trip_appt_why_dk,trip_appt_why_o,trip_appt_why2,trip_appt_why2_1,trip_appt_why2_2,trip_appt_why2_3,trip_appt_why2_4,trip_appt_why2_5,trip_appt_why2_se,trip_appt_why2_rf,trip_appt_why2_dk,trip_appt_why2_o,initiated_by,perno_1,perno_2,perno_3,perno_4,perno_5,perno_6,perno_7,perno_8,perno_9,perno_10,perno_11,perno_12,companions,hhcount,nonhhcount,hhparty,party
i64,i64,i64,i64,i64,str,str,i64,i64,f64,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
20000228,1,2,1,10405,"""2017-10-02 03:00:00""","""2017-10-02 05:24:23""",0,144,-9.0,202,-1,-9,-9,1,"""-1""",0,-1,4,-1,-9,-1,-1,-1,3,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
20000228,1,2,2,10000,"""2017-10-02 05:26:16""","""2017-10-02 12:06:32""",2,400,0.81,-9,-1,-9,-1,-9,"""-1""",-9,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9
20000228,1,2,3,2000024,"""2017-10-02 12:40:20""","""2017-10-02 13:23:41""",34,43,6.35,-9,-1,1,-1,-9,"""-1""",-9,-1,5,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9
20000228,1,2,4,10401,"""2017-10-02 13:29:30""","""2017-10-02 13:37:28""",6,8,1.47,-9,-1,1,-1,-9,"""-1""",-9,-1,5,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9
20000228,1,2,5,2000026,"""2017-10-02 13:52:33""","""2017-10-02 14:01:41""",15,9,0.14,-9,-1,2,-1,-9,"""-1""",-9,-1,5,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-9,-9,-9,-9,-9
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
70100900,1,2,1,2000001,"""2019-02-01 03:30:00""","""2019-02-01 04:00:00""",30,30,9.8,202,-1,-9,-9,26,"""-1""",0,-1,4,-1,2,-1,-1,-1,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
70100900,1,2,2,2000001,"""2019-02-01 04:15:00""","""2019-02-01 04:20:00""",15,5,0.0,202,-1,-9,-9,26,"""-1""",0,-1,4,-1,2,-1,-1,-1,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-9,-9,-9,2
70100900,1,2,3,2000001,"""2019-02-01 04:22:00""","""2019-02-01 04:25:00""",2,3,0.0,202,-1,-9,-9,26,"""-1""",0,-1,4,-1,2,-1,-1,-1,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-9,-9,-9,2
70100908,1,3,1,3000002,"""2019-01-20 16:43:06""","""2019-01-20 21:11:33""",2776,268,19.61,202,-1,-9,-9,4,"""-1""",0,-1,1,-1,2,-1,-1,-1,97,-9,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


In [51]:
len(trips)

98034

In [None]:
# import json

# maps = pl.read_csv(cmap_root / "data_dictionary.csv")

# for table, name in zip(
#     ["HOUSEHOLD", "PERSON", "GPS_DAY", "VEHICLE"],
#     ["hh", "person", "trip", "veh"]
# ):
#     data = maps.filter(pl.col("TABLE") == table)
#     mapper = {}
#     for i, frame in data.group_by("NAME"):
#         column = str(i[0]).lower()
#         mapper[column] = {}
#         keys = frame.select(pl.col("VALUE")).rows()
#         values = frame.select(pl.col("LABEL")).rows()
#         for k, v in zip(keys, values):
#             mapper[column][int(k[0])] = str(v[0])

#     with open(f"cmap/{name}_dictionary.json", "w", encoding="utf-8") as f:
#         json.dump(mapper, f, ensure_ascii=False, indent=4)