In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Load the dataset
file_path = '/content/Data.xlsx'  # Replace with your file path
df = pd.read_excel(file_path)

# Reshape data from wide to long format
df_long = df.melt(id_vars=["Warehouse id", "Region", "SKU id"],
                  var_name="Date",
                  value_name="Sales")

# Convert the 'Date' column to datetime format
df_long['Date'] = pd.to_datetime(df_long['Date'])

# Sort the data for time series processing
df_long = df_long.sort_values(by=["Warehouse id", "SKU id", "Date"])

# Fill missing values (if any) with 0
df_long['Sales'] = df_long['Sales'].fillna(0)

# Feature Engineering: Add lag and rolling mean features
def create_features(data, lags, rolling_windows):
    for lag in lags:
        data[f'lag_{lag}'] = data.groupby(['Warehouse id', 'SKU id'])['Sales'].shift(lag)
    for window in rolling_windows:
        data[f'rolling_mean_{window}'] = data.groupby(['Warehouse id', 'SKU id'])['Sales'].shift(1).rolling(window).mean()
    return data

df_long = create_features(df_long, lags=[1, 2, 3], rolling_windows=[2, 3])

# Drop rows with NaN values (caused by lag/rolling features)
df_long = df_long.dropna()

# Split data into training and testing sets
train = df_long[df_long['Date'] < '2021-06-01']
test = df_long[df_long['Date'] == '2021-06-01']

# Define features and target variable
features = [col for col in df_long.columns if col.startswith('lag_') or col.startswith('rolling_mean_')]
target = 'Sales'

# Train a separate model for each SKU-Warehouse combination
predictions = []
for (warehouse, sku), group in train.groupby(['Warehouse id', 'SKU id']):
    # Train-test split
    X_train = group[features]
    y_train = group[target]

    # Test data for this SKU-Warehouse
    test_group = test[(test['Warehouse id'] == warehouse) & (test['SKU id'] == sku)]
    X_test = test_group[features]

    # Train the model
    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)

    # Predict for June 2021
    if not X_test.empty:
        test_group['Forecasted Sales'] = model.predict(X_test)
        predictions.append(test_group)

# Combine all predictions
predictions_df = pd.concat(predictions)

# Evaluate using MAPE
def calculate_mape(actual, forecasted):
    mape = np.mean(np.abs((actual - forecasted) / actual)) * 100
    return mape

mape = calculate_mape(predictions_df['Sales'], predictions_df['Forecasted Sales'])

# Save predictions to a file
predictions_df.to_csv('June_2021_Forecast.csv', index=False)

# Output MAPE
print(f"MAPE: {mape:.2f}%")

ValueError: No objects to concatenate