In [15]:
# Loading and preprocessing data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("https://raw.githubusercontent.com/aadyasharma13/AI-powered-inventory-management-system/refs/heads/main/dataset.csv")

# Convert date columns
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['expiry_date'] = pd.to_datetime(df['expiry_date'])
df.sort_values(by='timestamp', inplace=True)

# Encode categorical column
df['weather'] = LabelEncoder().fit_transform(df['weather'])

# Create lag feature
df['prev_sales'] = df.groupby('product_id')['quantity_sold'].shift(1)
df.dropna(inplace=True)

# Features and target
X = df[['prev_sales', 'price', 'weather']]
y = df['quantity_sold']


In [16]:
# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [18]:
# Model Building
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
from math import sqrt
lr_rmse = sqrt(mean_squared_error(y_test, lr.predict(X_test)))

# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_rmse = sqrt(mean_squared_error(y_test, rf.predict(X_test)))

In [19]:
# Long Short Term Memory Model
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Scale only after splitting
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train_scaled = scaler_x.fit_transform(X_train)
X_test_scaled = scaler_x.transform(X_test)

y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

# Reshape for LSTM
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# Build and train model
lstm = Sequential([
    LSTM(64, activation='relu', input_shape=(1, X.shape[1])),
    Dense(1)
])
lstm.compile(optimizer=Adam(0.001), loss='mse')
lstm.fit(X_train_lstm, y_train_scaled, epochs=20, verbose=0)

# Predict
y_pred_scaled = lstm.predict(X_test_lstm)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_test_scaled)

# RMSE
lstm_rmse = sqrt(mean_squared_error(y_true, y_pred))


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step


In [20]:
# Comparing and selecting best model
print(f"Linear Regression RMSE: {lr_rmse:.2f}")
print(f"Random Forest RMSE: {rf_rmse:.2f}")
print(f"LSTM RMSE: {lstm_rmse:.2f}")

if min([lr_rmse, rf_rmse, lstm_rmse]) == lr_rmse:
    best_model = lr
    model_type = 'linear'
elif min([lr_rmse, rf_rmse, lstm_rmse]) == rf_rmse:
    best_model = rf
    model_type = 'rf'
else:
    best_model = lstm
    model_type = 'lstm'

print(f"✅ Best model selected: {model_type.upper()}")


Linear Regression RMSE: 6.08
Random Forest RMSE: 6.09
LSTM RMSE: 2.28
✅ Best model selected: LSTM


In [21]:
# Forecasting demand
latest = df.groupby('product_id').tail(1).copy()
X_forecast = latest[['prev_sales', 'price', 'weather']]

if model_type == 'lstm':
    X_forecast_scaled = scaler_x.transform(X_forecast)
    X_forecast_reshaped = X_forecast_scaled.reshape((X_forecast_scaled.shape[0], 1, X_forecast_scaled.shape[1]))
    forecast_scaled = best_model.predict(X_forecast_reshaped)
    latest['forecast'] = scaler_y.inverse_transform(forecast_scaled)
else:
    latest['forecast'] = best_model.predict(X_forecast)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 337ms/step


In [22]:
# Inventory Alert
today = pd.to_datetime('today')
latest['alerts'] = ''

for i, row in latest.iterrows():
    alerts = []
    if row['stock_level'] < 10:
        alerts.append('Low Stock')
    if row['stock_level'] > 150:
        alerts.append('Overstocked')
    if (row['expiry_date'] - today).days <= 2:
        alerts.append('Expiring Soon')
    latest.at[i, 'alerts'] = ', '.join(alerts)

latest[['product_id', 'product_name', 'forecast', 'stock_level', 'expiry_date', 'alerts']]


Unnamed: 0,product_id,product_name,forecast,stock_level,expiry_date,alerts
9,P001,Milk,7.146977,8,2025-07-11,"Low Stock, Expiring Soon"
10,P002,Bread,6.460545,5,2025-07-10,"Low Stock, Expiring Soon"
11,P003,Eggs,5.906097,62,2025-07-14,


In [23]:
latest.to_csv('forecast_and_alerts.csv', index=False)
from google.colab import files
files.download('forecast_and_alerts.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>