In [5]:
# STEP 1: Import Libraries

import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from datetime import datetime
import holidays
import joblib

In [8]:
# STEP 2: Load Dataset
# ============================================
data = pd.read_csv('inventory.csv')
print("Data loaded successfully!")
data.head()

# ============================================
# STEP 3: Feature Engineering
# ============================================
parts = data['date'].str.split('-', n=3, expand=True)
data['year'] = parts[0].astype('int')
data['month'] = parts[1].astype('int')
data['day'] = parts[2].astype('int')

def weekend_or_weekday(year, month, day):
    d = datetime(year, month, day)
    return 1 if d.weekday() > 4 else 0

data['weekend'] = data.apply(lambda x: weekend_or_weekday(x['year'], x['month'], x['day']), axis=1)

india_holidays = holidays.country_holidays('IN')
data['holiday'] = data['date'].apply(lambda x: 1 if india_holidays.get(x) else 0)

def which_day(year, month, day):
    return datetime(year, month, day).weekday()

data['weekday'] = data.apply(lambda x: which_day(x['year'], x['month'], x['day']), axis=1)

data.drop('date', axis=1, inplace=True)
print("Feature engineering completed!")

Data loaded successfully!
Feature engineering completed!


In [12]:
# STEP 4: Split Data

X = data.drop(['sales'], axis=1)
y = data['sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# STEP 5: Train Models

models = {
    'LinearRegression': LinearRegression(),
    'Lasso': Lasso(),
    'Ridge': Ridge(),
    'XGBoost': XGBRegressor()
}

for name, model in models.items():
    score = cross_val_score(model, X_train, y_train, cv=5)
    print(f"{name} Mean CV Score: {score.mean()}")

LinearRegression Mean CV Score: 0.09349267480806296
Lasso Mean CV Score: 0.09221163522348144
Ridge Mean CV Score: 0.0934926749977999
XGBoost Mean CV Score: 0.9278741955757142


In [14]:
# STEP 6: Choose Best Model and Train Fully
# ============================================
best_model = XGBRegressor()
best_model.fit(X_train, y_train)

pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, pred)
print("Final XGBoost Test MSE:", mse)


Final XGBoost Test MSE: 59.44453430175781


In [18]:
# STEP 7: Save Model
# ============================================
joblib.dump(best_model, '../sales/model.pkl')
print("Model saved successfully at ../sales/model.pkl!")

Model saved successfully at ../sales/model.pkl!


In [19]:
import joblib
import numpy as np
from datetime import datetime
import holidays

# Load saved model
model = joblib.load('../sales/model.pkl')

# Example input
store = 1
item = 3
date = '2017-03-20'

# Convert date to features
year, month, day = map(int, date.split('-'))
weekend = 1 if datetime(year, month, day).weekday() > 4 else 0
india_holidays = holidays.country_holidays('IN')
holiday = 1 if india_holidays.get(date) else 0
weekday = datetime(year, month, day).weekday()

features = np.array([[store, item, year, month, day, weekend, holiday, weekday]])

# Make prediction
prediction = model.predict(features)[0]
print(f"Predicted sales for store {store}, item {item} on {date}: {round(prediction, 2)}")


Predicted sales for store 1, item 3 on 2017-03-20: 30.200000762939453
