In [1]:
import geopandas as gpd
import pandas as pd
from calendar import SATURDAY

from data.constants import (DNC_START, DNC_END)
from data.datemath import from_ymd

In [2]:
train_rides_in = "../data/raw/train_rides.csv"
bus_rides_in = "../data/raw/bus_rides.csv"
bike_rides_in = "../data/interim/bike_rides_v3.geoparquet"
uber_tract_rides_in = "../data/raw/uber_tract_rides.parquet"
uber_comm_rides_in = "../data/raw/uber_comm_rides.parquet"

train_rides_out = "../data/interim/train_rides.csv"
bus_rides_out = "../data/interim/bus_rides.csv"
bike_rides_out = "../data/interim/bike_rides_v4.geoparquet"
uber_tract_rides_out = "../data/interim/uber_tract_rides.parquet"
uber_comm_rides_out = "../data/interim/uber_comm_rides.parquet"

# Pipeline in

In [3]:
train_rides = pd.read_csv(train_rides_in)
bus_rides = pd.read_csv(bus_rides_in)
bike_rides = gpd.read_parquet(bike_rides_in)
uber_tract_rides = pd.read_parquet(uber_tract_rides_in)
uber_comm_rides = pd.read_parquet(uber_comm_rides_in)

# Time-Varying Features

## DNC

In [4]:
train_rides['DNC'] = (train_rides['date'] >= DNC_START) & (train_rides['date'] <= DNC_END)
bus_rides['DNC'] = (bus_rides['date'] >= DNC_START) & (bus_rides['date'] <= DNC_END)
bike_rides['DNC'] = (bike_rides['date'] >= DNC_START) & (bike_rides['date'] <= DNC_END)
uber_tract_rides['DNC'] = (uber_tract_rides['date'] >= DNC_START) & (uber_tract_rides['date'] <= DNC_END)
uber_comm_rides['DNC'] = (uber_comm_rides['date'] >= DNC_START) & (uber_comm_rides['date'] <= DNC_END)

## Daytype

In [5]:
def label_daytype(df: pd.DataFrame):
    return df.assign(
        dotw = df.date.apply(lambda x: from_ymd(x).weekday()),
        is_weekend = lambda x: x.dotw >= SATURDAY)

In [6]:
train_rides = train_rides.pipe(label_daytype)
bike_rides = bike_rides.pipe(label_daytype)
bus_rides = bus_rides.pipe(label_daytype)
uber_tract_rides = uber_tract_rides.pipe(label_daytype)
uber_comm_rides = uber_comm_rides.pipe(label_daytype)

# Pipeline out

In [7]:
train_rides.to_csv(train_rides_out, index=False)
bus_rides.to_csv(bus_rides_out, index=False)
bike_rides.to_parquet(bike_rides_out, index=False)
uber_tract_rides.to_parquet(uber_tract_rides_out, index=False)
uber_comm_rides.to_parquet(uber_comm_rides_out, index=False)