In [1]:
print("Starting forecasting pipeline...")

# =========================
# Imports & Timer
# =========================
import time
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.linear_model import LinearRegression

START = time.perf_counter()


Starting forecasting pipeline...


In [2]:

# =========================
# Constants
# =========================
ORIGIN_DATE = pd.Timestamp("2019-01-01")

MODEL_FILE = "model_files/model.pkl"
PIPELINE_FILE = "model_files/pipeline.pkl"

INPUT_FILE = "predictions/input.csv"
OUTPUT_FILE = "predictions/output.csv"

os.makedirs("model_files", exist_ok=True)
os.makedirs("predictions", exist_ok=True)


In [3]:

# =========================
# Feature Engineering
# =========================
def build_time_features(X):
    X = X.copy()

    # Build date
    X["date"] = pd.to_datetime(
        dict(year=X.year, month=X.month, day=X.day)
    )

    # Time index (days since origin)
    X["t"] = (X["date"] - ORIGIN_DATE).dt.days

    # Yearly seasonality
    X["sin_year"] = np.sin(2 * np.pi * X["t"] / 365.25)
    X["cos_year"] = np.cos(2 * np.pi * X["t"] / 365.25)

    # Week of month (1â€“4/5)
    X["week_of_month"] = ((X["day"] - 1) // 7) + 1

    # Week-of-month seasonality
    X["sin_wom"] = np.sin(2 * np.pi * X["week_of_month"] / 4)
    X["cos_wom"] = np.cos(2 * np.pi * X["week_of_month"] / 4)

    return X[[
        "t",
        "sin_year", "cos_year",
        "sin_wom", "cos_wom"
    ]]


In [4]:

# =========================
# Pipeline
# =========================
pipeline = Pipeline([
    ("time_features", FunctionTransformer(build_time_features))
])


In [5]:

# =========================
# Load Dataset
# =========================
df = pd.read_csv("data/processed_Data.csv").dropna()

X = df[["year", "month", "day"]]
y = df["price"]


In [6]:

# =========================
# Train Model
# =========================
if not os.path.exists(MODEL_FILE):
    print("Training forecasting model...")

    X_transformed = pipeline.fit_transform(X)

    model = LinearRegression()
    model.fit(X_transformed, y)

    joblib.dump(model, MODEL_FILE)
    joblib.dump(pipeline, PIPELINE_FILE)

    print("Model trained and saved.")


In [7]:

# =========================
# Inference
# =========================
model = joblib.load(MODEL_FILE)
pipeline = joblib.load(PIPELINE_FILE)

input_df = pd.read_csv(INPUT_FILE)

X_future = pipeline.transform(
    input_df[["year", "month", "day"]]
)

predictions = model.predict(X_future)

input_df["predicted_price"] = predictions
input_df.to_csv(OUTPUT_FILE, index=False)

# =========================
# Runtime
# =========================
print(f"Pipeline completed in {time.perf_counter() - START:.2f}s")
print("Forecast saved to predictions/output.csv")



Pipeline completed in 0.19s
Forecast saved to predictions/output.csv
