<a href="https://colab.research.google.com/github/abarb2022/Walmart-Recruiting---Store-Sales-Forecasting/blob/main/model_experiment_prophet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install kaggle



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
! mkdir ~/.kaggle
!cp /content/drive/MyDrive/ML/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json
! kaggle competitions download -c walmart-recruiting-store-sales-forecasting
! unzip walmart-recruiting-store-sales-forecasting

Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 217MB/s]
Archive:  walmart-recruiting-store-sales-forecasting.zip
  inflating: features.csv.zip        
  inflating: sampleSubmission.csv.zip  
  inflating: stores.csv              
  inflating: test.csv.zip            
  inflating: train.csv.zip           


In [None]:
import pandas as pd
import numpy as np
from prophet import Prophet
import matplotlib.pyplot as plt
import holidays
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
import gc
import joblib
import os
from datetime import datetime
import json
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.expand_frame_repr', False)

In [None]:

stores = pd.read_csv('stores.csv')
train = pd.read_csv("train.csv.zip")
features = pd.read_csv('features.csv.zip')
sample = pd.read_csv('sampleSubmission.csv.zip')
test = pd.read_csv('test.csv.zip')

In [None]:
train['Date'] = pd.to_datetime(train['Date'])
test['Date'] = pd.to_datetime(test['Date'])
features['Date'] = pd.to_datetime(features['Date'])
train_df = pd.merge(train, features, on=['Store', 'Date', 'IsHoliday'], how='left')
test_df = pd.merge(test, features, on=['Store', 'Date', 'IsHoliday'], how='left')


train_df = pd.merge(train_df, stores, on='Store', how='left')
test_df = pd.merge(test_df, stores, on='Store', how='left')

print("\n--- Merged Train Data Info ---")
print(train_df.info())
print("\n--- Merged Test Data Info ---")
print(test_df.info())

del train, test, features, stores
gc.collect()


--- Merged Train Data Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 421570 entries, 0 to 421569
Data columns (total 16 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   Store         421570 non-null  int64         
 1   Dept          421570 non-null  int64         
 2   Date          421570 non-null  datetime64[ns]
 3   Weekly_Sales  421570 non-null  float64       
 4   IsHoliday     421570 non-null  bool          
 5   Temperature   421570 non-null  float64       
 6   Fuel_Price    421570 non-null  float64       
 7   MarkDown1     150681 non-null  float64       
 8   MarkDown2     111248 non-null  float64       
 9   MarkDown3     137091 non-null  float64       
 10  MarkDown4     134967 non-null  float64       
 11  MarkDown5     151432 non-null  float64       
 12  CPI           421570 non-null  float64       
 13  Unemployment  421570 non-null  float64       
 14  Type          421570 non-null  objec

0

In [None]:
class MissingValueImputer(BaseEstimator, TransformerMixin):

    def __init__(self, markdown_cols=None, numerical_cols_to_impute=None):
        self.markdown_cols = markdown_cols if markdown_cols is not None else [f'MarkDown{i}' for i in range(1, 6)]
        self.numerical_cols_to_impute = numerical_cols_to_impute if numerical_cols_to_impute is not None else ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
        self.means = {}

    def fit(self, X, y=None):
        for col in self.numerical_cols_to_impute:
            if col in X.columns:
                self.means[col] = X[col].mean()
        return self

    def transform(self, X):
        X_copy = X.copy()


        for col in self.markdown_cols:
          if col in X_copy.columns:
            X_copy[f"{col}_was_missing"] = X_copy[col].isna().astype(int)
            X_copy[col] = X_copy[col].fillna(0)


        for col in self.numerical_cols_to_impute:
            if col in X_copy.columns:
                X_copy[col] = X_copy[col].fillna(method='ffill').fillna(method='bfill')
                if X_copy[col].isnull().any() and col in self.means:
                    X_copy[col] = X_copy[col].fillna(self.means[col])
        return X_copy

In [None]:
class AdvancedDateFeatureExtractor(BaseEstimator, TransformerMixin):

    def __init__(self, date_column: str = 'Date', include_holidays: bool = True,
                 include_seasonal: bool = True, include_lags: bool = False):
        self.date_column = date_column
        self.include_holidays = include_holidays
        self.include_seasonal = include_seasonal
        self.include_lags = include_lags

    def _is_holiday_period(self, date):
        month, day = date.month, date.day

        if month == 11 and day >= 22:
            return 1
        elif month == 12:
            return 1
        elif month == 1 and day <= 7:
            return 1
        elif month == 9 and day <= 7:
            return 1
        elif month == 5 and day >= 25:
            return 1
        else:
            return 0

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copy = X.copy()

        if self.date_column not in X_copy.columns:
            raise ValueError(f"Date column '{self.date_column}' not found in DataFrame.")

        X_copy[self.date_column] = pd.to_datetime(X_copy[self.date_column])

        X_copy['Year'] = X_copy[self.date_column].dt.year
        X_copy['Month'] = X_copy[self.date_column].dt.month
        X_copy['Day'] = X_copy[self.date_column].dt.day
        X_copy['DayOfWeek'] = X_copy[self.date_column].dt.dayofweek
        X_copy['Week'] = X_copy[self.date_column].dt.isocalendar().week.astype(int)
        X_copy['Quarter'] = X_copy[self.date_column].dt.quarter
        X_copy['DayOfYear'] = X_copy[self.date_column].dt.dayofyear

        X_copy['Month_sin'] = np.sin(2 * np.pi * X_copy['Month'] / 12)
        X_copy['Month_cos'] = np.cos(2 * np.pi * X_copy['Month'] / 12)
        X_copy['Week_sin'] = np.sin(2 * np.pi * X_copy['Week'] / 52)
        X_copy['Week_cos'] = np.cos(2 * np.pi * X_copy['Week'] / 52)
        X_copy['DayOfWeek_sin'] = np.sin(2 * np.pi * X_copy['DayOfWeek'] / 7)
        X_copy['DayOfWeek_cos'] = np.cos(2 * np.pi * X_copy['DayOfWeek'] / 7)

        if self.include_seasonal:
            X_copy['Season'] = X_copy['Month'].map({12: 0, 1: 0, 2: 0,
                                                   3: 1, 4: 1, 5: 1,
                                                   6: 2, 7: 2, 8: 2,
                                                   9: 3, 10: 3, 11: 3})

            X_copy['IsWeekend'] = (X_copy['DayOfWeek'] >= 5).astype(int)

            X_copy['IsMonthEnd'] = (X_copy[self.date_column].dt.is_month_end).astype(int)
            X_copy['IsMonthStart'] = (X_copy[self.date_column].dt.is_month_start).astype(int)

        if self.include_holidays:
            X_copy['IsHolidayPeriod'] = X_copy[self.date_column].apply(self._is_holiday_period)

            if 'IsHoliday' in X_copy.columns:
                if X_copy['IsHoliday'].dtype == bool:
                    X_copy['IsHoliday'] = X_copy['IsHoliday'].astype(int)

        markdown_cols = [col for col in X_copy.columns if col.startswith('MarkDown') and not col.endswith('_was_missing')]
        if markdown_cols:
            X_copy['Total_MarkDown'] = X_copy[markdown_cols].sum(axis=1)
            X_copy['MarkDown_Intensity'] = X_copy['Total_MarkDown'] / (X_copy['Total_MarkDown'].mean() + 1e-8)
            X_copy['HasMarkDown'] = (X_copy['Total_MarkDown'] > 0).astype(int)

        econ_cols = ['Fuel_Price', 'CPI', 'Unemployment']
        available_econ = [col for col in econ_cols if col in X_copy.columns]

        if len(available_econ) >= 2:
            if 'Fuel_Price' in X_copy.columns and 'CPI' in X_copy.columns:
                X_copy['Fuel_CPI_Ratio'] = X_copy['Fuel_Price'] / (X_copy['CPI'] + 1e-8)

            if 'CPI' in X_copy.columns and 'Unemployment' in X_copy.columns:
                X_copy['Economic_Index'] = (X_copy['CPI'] * 0.4 + (100 - X_copy['Unemployment']) * 0.6) / 100

        return X_copy


In [None]:
class DateFeatureExtractor(BaseEstimator, TransformerMixin):

    def __init__(self, date_column='Date'):
        self.date_column = date_column

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copy = X.copy()
        if self.date_column not in X_copy.columns:
            raise ValueError(f"Date column '{self.date_column}' not found in DataFrame.")

        X_copy[self.date_column] = pd.to_datetime(X_copy[self.date_column])

        X_copy['Year'] = X_copy[self.date_column].dt.year
        X_copy['Month'] = X_copy[self.date_column].dt.month
        X_copy['Month_sin'] = np.sin(2 * np.pi * X_copy['Month'] / 12)
        X_copy['Month_cos'] = np.cos(2 * np.pi * X_copy['Month'] / 12)

        X_copy['Week'] = X_copy[self.date_column].dt.isocalendar().week.astype(int)
        X_copy['Day'] = X_copy[self.date_column].dt.day
        X_copy['DayOfWeek'] = X_copy[self.date_column].dt.dayofweek

        if 'IsHoliday' in X_copy.columns and X_copy['IsHoliday'].dtype == bool:
            X_copy['IsHoliday'] = X_copy['IsHoliday'].astype(int)

        return X_copy.drop(columns=[ "Month"])


In [None]:
class CategoricalFeatureConverter(BaseEstimator, TransformerMixin):

    def __init__(self, categorical_cols=None):
        self.categorical_cols = categorical_cols if categorical_cols is not None else ['Store', 'Dept', 'Type']

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copy = X.copy()
        for col in self.categorical_cols:
            if col in X_copy.columns:
                X_copy[col] = X_copy[col].astype('category')
        return X_copy

In [None]:
def get_walmart_holidays():
    holiday_dates = pd.DataFrame(columns=['ds', 'holiday'])

    super_bowl = ['2010-02-12', '2011-02-11', '2012-02-10', '2013-02-08']
    holiday_dates = pd.concat([holiday_dates, pd.DataFrame({
        'ds': pd.to_datetime(super_bowl),
        'holiday': 'Super Bowl'
    })])

    labor_day = ['2010-09-10', '2011-09-09', '2012-09-07', '2013-09-06']
    holiday_dates = pd.concat([holiday_dates, pd.DataFrame({
        'ds': pd.to_datetime(labor_day),
        'holiday': 'Labor Day'
    })])

    thanksgiving = ['2010-11-26', '2011-11-25', '2012-11-23', '2013-11-29']
    holiday_dates = pd.concat([holiday_dates, pd.DataFrame({
        'ds': pd.to_datetime(thanksgiving),
        'holiday': 'Thanksgiving'
    })])

    christmas = ['2010-12-31', '2011-12-30', '2012-12-28', '2013-12-27']
    holiday_dates = pd.concat([holiday_dates, pd.DataFrame({
        'ds': pd.to_datetime(christmas),
        'holiday': 'Christmas'
    })])

    return holiday_dates

walmart_holidays = get_walmart_holidays()

  holiday_dates = pd.concat([holiday_dates, pd.DataFrame({


In [None]:
!pip install prophet holidays joblib



In [None]:


y_train = train_df['Weekly_Sales']
X_train = train_df.drop(columns=['Weekly_Sales', 'Id'], errors='ignore')

temp_train_df = X_train.copy()
temp_train_df['Date'] = pd.to_datetime(train_df['Date'])
temp_train_df['Weekly_Sales'] = y_train

temp_train_df = temp_train_df.sort_values(by='Date').reset_index(drop=True)

validation_cutoff_date = pd.to_datetime('2012-07-01')

X_train_split = temp_train_df[temp_train_df['Date'] < validation_cutoff_date]
y_train_split = temp_train_df[temp_train_df['Date'] < validation_cutoff_date]['Weekly_Sales']

X_val_split = temp_train_df[temp_train_df['Date'] >= validation_cutoff_date]
y_val_split = temp_train_df[temp_train_df['Date'] >= validation_cutoff_date]['Weekly_Sales']

preprocessing_pipeline = Pipeline([
 #   ('date_extractor', AdvancedDateFeatureExtractor()),
    ('missing_imputer', MissingValueImputer())
#    ('cat_converter', CategoricalFeatureConverter())
])

X_train_processed = preprocessing_pipeline.fit_transform(X_train_split)
X_val_processed = preprocessing_pipeline.transform(X_val_split)
#X_train_processed = X_train_split.copy()
#X_val_processed = X_val_split.copy()
X_train_processed['Weekly_Sales'] = y_train_split.values
X_val_processed['Weekly_Sales'] = y_val_split.values



def weighted_mean_absolute_error(y_true, y_pred, weights):
    return np.sum(weights * np.abs(y_true - y_pred)) / np.sum(weights)

val_weights = np.where(X_val_split['IsHoliday'] == 1, 5, 1)

log_data = {

    'total_combinations': 0,
    'models_trained': 0,
    'models_skipped': 0,
    'validation_metrics': {}
}

  X_copy[col] = X_copy[col].fillna(method='ffill').fillna(method='bfill')
  X_copy[col] = X_copy[col].fillna(method='ffill').fillna(method='bfill')


In [None]:
def get_nearest_dept(store, dept, X_train_processed):
    """Find department in same store with closest average sales"""
    store_data = X_train_processed[X_train_processed['Store'] == store]
    if len(store_data) == 0:
        return None

    dept_means = store_data.copy()
    dept_means['Dept'] = pd.to_numeric(dept_means['Dept'], errors='coerce')
    dept = pd.to_numeric(dept, errors='coerce')

    dept_means = dept_means.groupby('Dept')['Weekly_Sales'].mean().reset_index()
    dept_means['abs_diff'] = np.abs(dept_means['Dept'] - dept)

    nearest = dept_means.sort_values(['abs_diff', 'Weekly_Sales']).iloc[0]
    return nearest['Dept']

In [None]:
def train_models(X_train_processed, X_val_processed):
    log_data = {
        'total_combinations': 0,
        'models_trained': 0,
        'models_skipped': 0,
        'fallback_used': 0,
        'validation_metrics': {}
    }

    combinations = X_train_processed[['Store', 'Dept']].drop_duplicates()
    log_data['total_combinations'] = len(combinations)
    val_results = []
    models = {}

    # es washale
    extra_regressors = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
                      'IsHoliday', 'Size', 'MarkDown1', 'MarkDown2',
                      'MarkDown3', 'MarkDown4', 'MarkDown5']

    print("\nTraining global fallback model...")
    prophet_train_global = X_train_processed[['Date', 'Weekly_Sales'] + extra_regressors].rename(columns={
        'Date': 'ds',
        'Weekly_Sales': 'y'
    })

    model_global = Prophet(
        holidays=walmart_holidays,
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,
        seasonality_mode='multiplicative'
    )

    for regressor in extra_regressors:
        if regressor in prophet_train_global.columns:
            model_global.add_regressor(regressor)

    model_global.fit(prophet_train_global)
    print("Global fallback model trained.")

    print(f"\nStarting training on {len(X_train_processed):,} samples")
    print(f"Found {len(combinations)} unique store-dept combinations")

    for idx, (store, dept) in enumerate(combinations.itertuples(index=False), 1):
        print(f"\nProcessing {idx}/{len(combinations)} - Store {store}, Dept {dept}")
        print("-" * 50)

        train_data = X_train_processed[(X_train_processed['Store'] == store) &
                                     (X_train_processed['Dept'] == dept)]
        val_data = X_val_processed[(X_val_processed['Store'] == store) &
                                 (X_val_processed['Dept'] == dept)]

        if len(train_data) < 10:
            print(f"  Skipping - only {len(train_data)} training samples")
            log_data['models_skipped'] += 1
            continue

        prophet_train = train_data[['Date', 'Weekly_Sales'] + extra_regressors].rename(columns={
            'Date': 'ds',
            'Weekly_Sales': 'y'
        })

        model = Prophet(
            holidays=walmart_holidays,
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False,
            seasonality_mode='multiplicative'
        )

        for regressor in extra_regressors:
            if regressor in prophet_train.columns:
                model.add_regressor(regressor)

        print(f"  Training on {len(prophet_train)} samples...")
        model.fit(prophet_train)
        models[(store, dept)] = model
        log_data['models_trained'] += 1

        if len(val_data) > 0:
            prophet_val = val_data[['Date'] + extra_regressors].rename(columns={'Date': 'ds'})
            forecast = model.predict(prophet_val)

            val_actual = val_data['Weekly_Sales'].values
            val_pred = forecast['yhat'].values
            val_weights = np.where(val_data['IsHoliday'] == 1, 5, 1)

            mae = mean_absolute_error(val_actual, val_pred)
            wmae = weighted_mean_absolute_error(val_actual, val_pred, val_weights)

            val_results.append({
                'Store': store,
                'Dept': dept,
                'Date': val_data['Date'].values,
                'Actual': val_actual,
                'Predicted': val_pred,
                'MAE': mae,
                'WMAE': wmae,
                'IsHoliday': val_data['IsHoliday'].values,
                'UsedFallback': False
            })

            print(f"  Validation MAE: {mae:.2f}, WMAE: {wmae:.2f}")

    '''
    skipped_combinations = set(combinations.itertuples(index=False)) - set(models.keys())
    for store, dept in skipped_combinations:

        print(f"\nProcessing skipped combination - Store {store}, Dept {dept}")
        val_data = X_val_processed[(X_val_processed['Store'] == store) &
                                 (X_val_processed['Dept'] == dept)]

        if len(val_data) == 0:
            continue

        # Try nearest neighbor fallback first
        nearest_dept = get_nearest_dept(store, dept, X_train_processed)
        if nearest_dept is not None and (store, nearest_dept) in models:
            print(f"  Using nearest neighbor: Store {store}, Dept {nearest_dept}")
            model = models[(store, nearest_dept)]
            fallback_type = "nearest_neighbor"
        else:
            print("  Using global fallback model")
            model = model_global
            fallback_type = "global"

        prophet_val = val_data[['Date'] + extra_regressors].rename(columns={'Date': 'ds'})
        forecast = model.predict(prophet_val)

        val_actual = val_data['Weekly_Sales'].values
        val_pred = forecast['yhat'].values
        val_weights = np.where(val_data['IsHoliday'] == 1, 5, 1)

        mae = mean_absolute_error(val_actual, val_pred)
        wmae = weighted_mean_absolute_error(val_actual, val_pred, val_weights)

        val_results.append({
            'Store': store,
            'Dept': dept,
            'Date': val_data['Date'].values,
            'Actual': val_actual,
            'Predicted': val_pred,
            'MAE': mae,
            'WMAE': wmae,
            'IsHoliday': val_data['IsHoliday'].values,
            'UsedFallback': True,
            'FallbackType': fallback_type
        })

        log_data['fallback_used'] += 1
        print(f"  Fallback Validation MAE: {mae:.2f}, WMAE: {wmae:.2f}")
    '''
    return models, model_global, val_results, log_data


In [None]:
models, model_global, val_results, log_data = train_models(X_train_processed, X_val_processed)


Output hidden; open in https://colab.research.google.com to view.

In [None]:
if val_results:
    all_rows = []
    for x in val_results:
        dates = np.array(x['Date'])
        actuals = np.array(x['Actual'])
        preds = np.array(x['Predicted'])
        is_holidays = np.array(x['IsHoliday'])

        min_length = min(len(dates), len(actuals), len(preds), len(is_holidays))

        for i in range(min_length):
            all_rows.append({
                'Store': x['Store'],
                'Dept': x['Dept'],
                'Date': dates[i],
                'Actual': float(actuals[i]),
                'Predicted': float(preds[i]),
                'IsHoliday': bool(is_holidays[i]),
                'UsedFallback': x.get('UsedFallback', False),
                'FallbackType': x.get('FallbackType', None)
            })

    all_val = pd.DataFrame(all_rows)

    all_val['Weight'] = np.where(all_val['IsHoliday'], 5, 1)

    try:
        overall_wmae = weighted_mean_absolute_error(
            all_val['Actual'].astype(float),
            all_val['Predicted'].astype(float),
            all_val['Weight'].astype(float))

        avg_mae = np.mean([float(x['MAE']) for x in val_results])
        avg_wmae = np.mean([float(x['WMAE']) for x in val_results])

        log_data['validation_metrics'] = {
            'overall_wmae': float(overall_wmae),
            'average_mae': float(avg_mae),
            'average_wmae': float(avg_wmae),
            'num_validated': int(len(all_val)),
            'coverage': float(len(all_val) / len(X_val_processed)),
            'fallback_usage': float(len(all_val[all_val['UsedFallback']]) / len(all_val))
        }

        print("\nOverall Validation Metrics:")
        print(f"- Weighted MAE (WMAE): {overall_wmae:.2f}")
        print(f"- Average MAE: {avg_mae:.2f}")
        print(f"- Average WMAE: {avg_wmae:.2f}")
        print(f"- Coverage: {len(all_val)/len(X_val_processed):.2%}")
        print(f"- Fallback Usage: {len(all_val[all_val['UsedFallback']])/len(all_val):.2%}")

        all_val.to_csv('validation_results.csv', index=False)
        print("\nSaved validation results to validation_results.csv")

    except Exception as e:
        print(f"Error calculating metrics: {str(e)}")
        log_data['validation_metrics'] = {
            'error': str(e),
            'num_validated': len(all_val)
        }

try:
    with open('training_log.json', 'w') as f:
        json.dump(log_data, f, indent=2, default=str)
    print("\nSaved training logs to training_log.json")
except Exception as e:
    print(f"Error saving logs: {str(e)}")

print("\nTraining and validation complete!")
print(f"- Models trained: {log_data.get('models_trained', 'N/A')}")
print(f"- Models skipped: {log_data.get('models_skipped', 'N/A')}")
print(f"- Fallbacks used: {log_data.get('fallback_used', 'N/A')}")
print(f"- Validation coverage: {len(all_val)/len(X_val_processed):.2%}" if 'all_val' in locals() else "N/A")


Overall Validation Metrics:
- Weighted MAE (WMAE): 3040.82
- Average MAE: 2967.36
- Average WMAE: 3043.97
- Coverage: 99.72%
- Fallback Usage: 0.00%

Saved validation results to validation_results.csv

Saved training logs to training_log.json

Training and validation complete!
- Models trained: 3152
- Models skipped: 165
- Fallbacks used: 0
- Validation coverage: 99.72%


In [None]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-3.1.4-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==3.1.4 (from mlflow)
  Downloading mlflow_skinny-3.1.4-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.1.4->mlflow)
  Downloading databricks_sdk-0.61.0-py3-none-any.whl.metadata (39 kB)
Collecting opentelemetry-api<3,>=1.9.0 (from mlflow-skinny==3.1.4->mlflow)
  Downloading opentelemetry_api-1.36.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-sdk<3,>=1.9.0 (from mlflow-skinny==3.1.4->mlflow)
  Downloading opentele

In [None]:
!pip install dagshub

Collecting dagshub
  Downloading dagshub-0.6.2-py3-none-any.whl.metadata (12 kB)
Collecting appdirs>=1.4.4 (from dagshub)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting dacite~=1.6.0 (from dagshub)
  Downloading dacite-1.6.0-py3-none-any.whl.metadata (14 kB)
Collecting gql[requests] (from dagshub)
  Downloading gql-3.5.3-py2.py3-none-any.whl.metadata (9.4 kB)
Collecting dataclasses-json (from dagshub)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting treelib>=1.6.4 (from dagshub)
  Downloading treelib-1.8.0-py3-none-any.whl.metadata (3.3 kB)
Collecting pathvalidate>=3.0.0 (from dagshub)
  Downloading pathvalidate-3.3.1-py3-none-any.whl.metadata (12 kB)
Collecting boto3 (from dagshub)
  Downloading boto3-1.39.17-py3-none-any.whl.metadata (6.7 kB)
Collecting semver (from dagshub)
  Downloading semver-3.0.4-py3-none-any.whl.metadata (6.8 kB)
Collecting dagshub-annotation-converter>=0.1.5 (from dagshub)
  Downloading dagshub_an

In [None]:
import mlflow
import os

os.environ["MLFLOW_TRACKING_USERNAME"] = "goguaD"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "685c4f5b2a0c555f9136c60a8666661d952de9be"
mlflow.set_tracking_uri("https://dagshub.com/goguaD/finalProjectML.mlflow")
mlflow.set_experiment("walmart-sales")

with mlflow.start_run(run_name="Prophet_Model_wo_Fallback") as run:
    mlflow.log_param("total_combinations", log_data.get('total_combinations'))
    mlflow.log_param("models_trained", log_data.get('models_trained'))
    mlflow.log_param("models_skipped", log_data.get('models_skipped'))

    metrics = log_data.get('validation_metrics', {})
    if metrics:
        mlflow.log_metric("overall_wmae", metrics.get("overall_wmae", 0.0))
        mlflow.log_metric("average_mae", metrics.get("average_mae", 0.0))
        mlflow.log_metric("average_wmae", metrics.get("average_wmae", 0.0))
        mlflow.log_metric("validation_coverage", metrics.get("coverage", 0.0))



    print(f"\n✅ MLflow logging complete: Run ID = {run.info.run_id}")


✅ MLflow logging complete: Run ID = 755047df06bb4a049f6682f2c59078c2
🏃 View run Prophet_Model_wo_Fallback at: https://dagshub.com/goguaD/finalProjectML.mlflow/#/experiments/0/runs/755047df06bb4a049f6682f2c59078c2
🧪 View experiment at: https://dagshub.com/goguaD/finalProjectML.mlflow/#/experiments/0
