# Walmart Sales Prediction - Model Training and Evaluation

This notebook trains multiple machine learning models and compares their performance.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.data_preprocessing import WalmartDataPreprocessor
from src.model_training import WalmartModelTrainer
from src.visualization import WalmartVisualizer
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

## 1. Load and Preprocess Data

In [None]:
# Initialize preprocessor
preprocessor = WalmartDataPreprocessor()

# Load data
df = preprocessor.load_data('../data/raw/Walmart.csv')
print(f"\nDataset shape: {df.shape}")
df.head()

In [None]:
# Check data info
df.info()

In [None]:
# Handle missing values
df = preprocessor.handle_missing_values(df)

In [None]:
# Feature engineering
df = preprocessor.feature_engineering(df)
print(f"\nDataset shape after feature engineering: {df.shape}")
df.head()

In [None]:
# Encode categorical features (adjust column names based on your dataset)
categorical_columns = ['Store', 'Holiday_Flag']  # Update based on actual columns
existing_categorical = [col for col in categorical_columns if col in df.columns]

if existing_categorical:
    df = preprocessor.encode_categorical_features(df, existing_categorical)
    print(f"Encoded columns: {existing_categorical}")

In [None]:
# Prepare train-test split
TARGET_COLUMN = 'Weekly_Sales'  # Update based on actual target column
X_train, X_test, y_train, y_test = preprocessor.prepare_data(df, TARGET_COLUMN)

print(f"\nFeature columns: {X_train.columns.tolist()}")

## 2. Train Multiple Models

In [None]:
# Initialize trainer
trainer = WalmartModelTrainer()
trainer.initialize_models()

print(f"Models to train: {list(trainer.models.keys())}")

In [None]:
# Train all models
trainer.train_all_models(X_train, y_train, X_test, y_test)

## 3. Compare Model Performance

In [None]:
# Get results as DataFrame
results_df = trainer.get_results_dataframe()
results_df

In [None]:
# Visualize model comparison
visualizer = WalmartVisualizer(save_dir='../figures')
visualizer.plot_model_comparison(results_df, save_name='model_comparison.png')

## 4. Analyze Best Model

In [None]:
# Get best model
best_model = trainer.best_model
best_model_name = trainer.best_model_name

print(f"Best Model: {best_model_name}")
print(f"\nTest Metrics:")
for metric, value in trainer.results[best_model_name]['test_metrics'].items():
    print(f"{metric}: {value:.4f}")

In [None]:
# Make predictions
y_pred = best_model.predict(X_test)

# Plot predictions vs actual
visualizer.plot_predictions_vs_actual(
    y_test, y_pred,
    title=f'{best_model_name} - Predictions vs Actual',
    save_name='predictions_vs_actual.png'
)

In [None]:
# Plot residuals
visualizer.plot_residuals(y_test, y_pred, save_name='residuals.png')

In [None]:
# Feature importance (if applicable)
if hasattr(best_model, 'feature_importances_'):
    feature_names = X_train.columns.tolist()
    visualizer.plot_feature_importance(
        best_model, feature_names,
        top_n=15,
        save_name='feature_importance.png'
    )

## 5. Save Model

In [None]:
# Save the best model
trainer.save_best_model('../models/best_model.pkl')

In [None]:
# Save results
results_df.to_csv('../results/model_comparison.csv', index=False)
print("Results saved to ../results/model_comparison.csv")

## 6. Making Predictions on New Data

In [None]:
# Example: Make predictions on the first 5 test samples
sample_predictions = best_model.predict(X_test.iloc[:5])

comparison_df = pd.DataFrame({
    'Actual': y_test.iloc[:5].values,
    'Predicted': sample_predictions,
    'Difference': y_test.iloc[:5].values - sample_predictions
})

print("Sample Predictions:")
comparison_df

## Summary

This notebook demonstrated:
1. Data preprocessing and feature engineering
2. Training multiple machine learning models
3. Comparing model performance
4. Analyzing the best model
5. Making predictions

The best model can now be used for production predictions!