# BigMart Sales Prediction - Submission

## 1. Import Libraries & Load Data

In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor

train_df = pd.read_csv('../dataset/processed/feat_eng_train.csv')
test_df = pd.read_csv('../dataset/processed/feat_eng_test.csv')

# Store IDs for submission
submission_ids = test_df[['Item_Identifier', 'Outlet_Identifier']]

## 2. Prepare Data
Training on the full dataset.

In [None]:
cols_to_drop = ['Item_Identifier', 'Outlet_Identifier', 'Item_Type', 'Outlet_Establishment_Year']

X = train_df.drop(columns=['Item_Outlet_Sales'])
y = train_df['Item_Outlet_Sales']

X = X.drop(columns=cols_to_drop, errors='ignore')
test_X = test_df.drop(columns=cols_to_drop, errors='ignore')

# Ensure columns match
missing_cols = set(X.columns) - set(test_X.columns)
for c in missing_cols:
    test_X[c] = 0
test_X = test_X[X.columns]

print("Training Data Shape:", X.shape)
print("Test Data Shape:", test_X.shape)

## 3. Train Final Model
Using XGBoost as it performed best.

In [None]:
xgb = XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1)
xgb.fit(X, y)

predictions = xgb.predict(test_X)

## 4. Create Submission File

In [None]:
submission = pd.DataFrame({
    'Item_Identifier': submission_ids['Item_Identifier'],
    'Outlet_Identifier': submission_ids['Outlet_Identifier'],
    'Item_Outlet_Sales': predictions
})
model_path = '../model_training/submission.csv'
submission.to_csv(model_path, index=False)
print(f"Submission saved to {model_path}")