In [14]:
%load_ext autoreload
%autoreload 2
import sys
import os
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.metrics import mean_absolute_error
from prophet import Prophet
from src.config import TRANSFORMED_DATA_DIR
from src.data_utils import split_time_series_data
from src.experiment_utils import set_mlflow_tracking
from dotenv import load_dotenv
import mlflow
from mlflow.models.signature import infer_signature

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# Load data
df = pd.read_parquet(TRANSFORMED_DATA_DIR / "tabular_data.parquet")

# Split data
X_train, y_train, X_test, y_test = split_time_series_data(
    df,
    cutoff_date=datetime(2023, 9, 1, 0, 0, 0),
    target_column="target"
)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

# Ensure y_train and y_test have a datetime index
if not isinstance(y_train.index, pd.DatetimeIndex):
    y_train.index = pd.date_range(start="2022-01-01", periods=len(y_train), freq="D")
if not isinstance(y_test.index, pd.DatetimeIndex):
    y_test.index = pd.date_range(start=y_train.index[-1] + pd.Timedelta(days=1), 
                                 periods=len(y_test), freq="D")

# Prepare data for Prophet (requires 'ds' and 'y' columns)
train_df = pd.DataFrame({
    'ds': y_train.index,
    'y': y_train.values
})
test_df = pd.DataFrame({
    'ds': y_test.index,
    'y': y_test.values
})

# Train the Prophet model
prophet_model = Prophet(
    yearly_seasonality=True,  # Enable yearly seasonality
    weekly_seasonality=True,  # Enable weekly seasonality
    daily_seasonality=True    # Enable daily seasonality
)
prophet_model.fit(train_df)

# Make future dataframe for predictions
future = prophet_model.make_future_dataframe(periods=len(y_test), freq="D")
forecast = prophet_model.predict(future)

# Extract predictions for the test period
predictions = forecast.tail(len(y_test))['yhat'].values

# Compute Mean Absolute Error (MAE)
test_mae = mean_absolute_error(y_test, predictions)
print(f"Test MAE: {test_mae:.4f}")

# Set up MLflow
load_dotenv()
mlflow = set_mlflow_tracking()

# Custom function to log Prophet model to MLflow
def log_prophet_to_mlflow(model, experiment_name, metric_name, score, test_df):
    with mlflow.start_run():
        # Log hyperparameters
        mlflow.log_param("yearly_seasonality", True)
        mlflow.log_param("weekly_seasonality", True)
        mlflow.log_param("daily_seasonality", True)
        
        # Log metric
        mlflow.log_metric(metric_name, score)
        
        # Use test_df['ds'] as input for signature inference
        input_df = test_df[['ds']]  # Prophet expects a DataFrame with 'ds'
        predictions = model.predict(input_df)['yhat']
        
        # Infer signature
        signature = infer_signature(input_df, predictions)
        
        # Log the model using Prophet flavor
        mlflow.prophet.log_model(model, "model", signature=signature)

# Log the Prophet model
log_prophet_to_mlflow(prophet_model, "Prophet", "mean_absolute_error", test_mae, test_df)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


DEBUG:cmdstanpy:cmd: where.exe tbb.dll
cwd: None
DEBUG:cmdstanpy:TBB already found in load path


(55900, 674)
(55900,)
(31720, 674)
(31720,)


DEBUG:cmdstanpy:input tempfile: C:\Users\Windows\AppData\Local\Temp\tmppgrzcjv2\4t2y9npy.json
DEBUG:cmdstanpy:input tempfile: C:\Users\Windows\AppData\Local\Temp\tmppgrzcjv2\tbhhghih.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['D:\\Code\\PythonTestEnvForPy11\\Lib\\site-packages\\prophet\\stan_model\\prophet_model.bin', 'random', 'seed=55409', 'data', 'file=C:\\Users\\Windows\\AppData\\Local\\Temp\\tmppgrzcjv2\\4t2y9npy.json', 'init=C:\\Users\\Windows\\AppData\\Local\\Temp\\tmppgrzcjv2\\tbhhghih.json', 'output', 'file=C:\\Users\\Windows\\AppData\\Local\\Temp\\tmppgrzcjv2\\prophet_model72mt7knh\\prophet_model-20250304172311.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:23:11 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:23:43 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:src.experiment_utils:MLflow tracking URI and c

Test MAE: 33.1503
🏃 View run rogue-ape-952 at: https://dagshub.com/s3akash/USTAXIMODEL.mlflow/#/experiments/0/runs/1db317670a5849f1af88bf514bdcb1cb
🧪 View experiment at: https://dagshub.com/s3akash/USTAXIMODEL.mlflow/#/experiments/0
