In [None]:
%reload_ext autoreload
%autoreload 2
%cd ..

In [None]:
import pandas as pd
from src.paths import TRANSFORMED_DATA_DIR

In [None]:
df = pd.read_parquet(TRANSFORMED_DATA_DIR / 'tabular_data.parquet')

In [None]:
df.head(5)

In [None]:
from datetime import datetime
from src.data_split import train_test_split

X_train, y_train, X_test, y_test = train_test_split(
    df,
    cutoff_date=datetime(2022, 6, 1, 0, 0 ,0),
    target_column_name='target_rides_next_hour'
)

In [None]:
print(f'{X_train.shape=}')
print(f'{y_train.shape=}')
print(f'{X_test.shape=}')
print(f'{y_test.shape=}')

In [None]:
import numpy as np

class BaselineModelPreviousHour:
    """
    Prediction = actual demand observed in the last hour
    """
    def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
        """
        Declare a blank function since we do not need to fit the
        model. This is just a placeholder so that we follow standard
        ML frameworks.
        """
        pass

    def predict(self, X_test: pd.DataFrame) -> np.array:
        """
        Predict that the rides next hour will be exactly the same
        as the previous hour
        """
        return X_test['rides_previous_1_hour']

In [None]:
model = BaselineModelPreviousHour()
predictions = model.predict(X_test)

In [None]:
predictions

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
test_mae = mean_absolute_error(y_test, predictions)

In [None]:
print(f'{test_mae=:.4f}')

In [None]:
class BaselineModelPreviousWeek:
    """
    Prediction = actual demand observed at t -7 days
    """
    def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
        pass

    def predict(self, X_test: pd.DataFrame) -> np.array:
        """
        Gets data from t - 7 and returns an array of predictions.
        """
        return X_test[f'rides_previous_{7*24}_hour']

In [None]:
model = BaselineModelPreviousWeek()
predictions = model.predict(X_test)

In [None]:
test_mae = mean_absolute_error(y_test, predictions)

In [None]:
print(f'{test_mae=:.4f}')

In [None]:
class BaselineModelLast4Weeks:
    """
    Prediction - actual demand observed at t - 7 days, t - 14 days, t - 21 days, t  - 28 days
    """
    def fit(self, X_train: pd.DataFrame, y_train: pd.Series):
        pass

    def predict(self, X_test:pd.DataFrame) -> pd.Series:
        """
        Return predictions for last 4 weeks data
        """
        return 0.25*(
            X_test[f'rides_previous_{1*7*24}_hour'] + \
            X_test[f'rides_previous_{2*7*24}_hour'] + \
            X_test[f'rides_previous_{3*7*24}_hour'] + \
            X_test[f'rides_previous_{4*7*24}_hour']
        )

In [None]:
model = BaselineModelLast4Weeks()
predictions = model.predict(X_test)

In [None]:
test_mae = mean_absolute_error(y_test, predictions)

In [None]:
print(f'{test_mae=:.4f}')