# =====================================================================================
# <center><h1>Forecasting Spare-Part Inventory</h1></center>
# =====================================================================================

### Description

* This dataset contains vehicle service records with invoice dates, vehicle details, odometer readings, and spare part descriptions, used to analyze and predict maintenance patterns and spare parts demand.


### Import lib & models 

In [79]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

import warnings
warnings.filterwarnings("ignore")


### Loading Dataset & Basic Checks

In [80]:
df=pd.read_csv('Spare_part_inventory.csv')
df.head()

Unnamed: 0,invoice_date,job_card_date,business_partner_name,vehicle_no,vehicle_model,current_km_reading,invoice_line_text
0,30-05-17,30-05-17,shivXXXXXXXXXX,KA03MFXXXX,BAJAJ AVENGER STREET 220,50000,ENGINE OIL
1,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,ENGINE OIL
2,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,POLISH
3,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,CONSUMABLES
4,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,COOLANT OIL


In [81]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28482 entries, 0 to 28481
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   invoice_date           28482 non-null  object
 1   job_card_date          28482 non-null  object
 2   business_partner_name  28482 non-null  object
 3   vehicle_no             28482 non-null  object
 4   vehicle_model          28482 non-null  object
 5   current_km_reading     28482 non-null  int64 
 6   invoice_line_text      28448 non-null  object
dtypes: int64(1), object(6)
memory usage: 1.5+ MB


In [82]:
df.describe(include='O')

Unnamed: 0,invoice_date,job_card_date,business_partner_name,vehicle_no,vehicle_model,invoice_line_text
count,28482,28482,28482,28482,28482,28448
unique,555,553,1010,846,28,502
top,01-12-18,01-12-18,venkXXXXXXXXXX,KA53EVXXXX,BAJAJ PULSAR 150,ENGINE OIL
freq,179,179,424,1313,8633,3802


In [83]:
df["invoice_date"] = pd.to_datetime(df["invoice_date"], dayfirst=True)

df = df.sort_values(["invoice_line_text", "invoice_date"])

df.reset_index(drop=True, inplace=True)

In [84]:
daily_demand = (
    df.groupby(["invoice_line_text", "invoice_date"])
      .size()
      .reset_index(name="daily_usage")
)


In [85]:
daily_demand["avg_usage_7d"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.rolling(7).mean())
)

daily_demand["avg_usage_30d"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.rolling(30).mean())
)

daily_demand["usage_std_30d"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.rolling(30).std())
)

In [86]:
daily_demand["day_of_week"] = daily_demand["invoice_date"].dt.dayofweek
daily_demand["month"] = daily_demand["invoice_date"].dt.month
daily_demand["is_weekend"] = daily_demand["day_of_week"].isin([5,6]).astype(int)

In [87]:
INITIAL_STOCK = 110     
SUPPLIER_LEAD_TIME = 7   


In [88]:
daily_demand["simulated_stock"] = (
    INITIAL_STOCK -
    daily_demand.groupby("invoice_line_text")["daily_usage"].cumsum()
)

daily_demand["simulated_stock"] = daily_demand["simulated_stock"].clip(lower=0)


In [89]:
daily_demand["future_7d_usage"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.shift(-1).rolling(7).sum())
)


In [90]:
np.random.seed(42)

daily_demand["noisy_future_7d_usage"] = (
    daily_demand["future_7d_usage"] *
    np.random.normal(loc=1.0, scale=0.2, size=len(daily_demand))
).clip(lower=0)


In [91]:
daily_demand["stockout_next_7d"] = (
    daily_demand["noisy_future_7d_usage"] > daily_demand["simulated_stock"]
).astype(int)


In [92]:
features = [
    "avg_usage_7d",
    "avg_usage_30d",
    "usage_std_30d",
    "day_of_week",
    "month",
    "is_weekend"
]

daily_demand = daily_demand.dropna().reset_index(drop=True)


In [93]:
daily_demand.head()

Unnamed: 0,invoice_line_text,invoice_date,daily_usage,avg_usage_7d,avg_usage_30d,usage_std_30d,day_of_week,month,is_weekend,simulated_stock,future_7d_usage,noisy_future_7d_usage,stockout_next_7d
0,3M OIL,2017-07-01,3,3.571429,3.1,1.373392,5,7,1,17,21.0,16.557614,0
1,3M OIL,2017-07-03,2,3.0,3.1,1.373392,0,7,0,15,20.0,23.29018,1
2,3M OIL,2017-07-04,3,2.857143,3.033333,1.325697,1,7,0,12,19.0,14.360794,1
3,3M OIL,2017-07-06,4,2.714286,3.066667,1.33735,3,7,0,8,17.0,17.710136,1
4,3M OIL,2017-07-07,2,2.428571,3.066667,1.33735,4,7,0,6,18.0,10.945188,1


In [94]:
split_date = daily_demand["invoice_date"].quantile(0.8)

train_df = daily_demand[daily_demand["invoice_date"] <= split_date]
test_df  = daily_demand[daily_demand["invoice_date"] > split_date]

x_train = train_df[features]
y_train = train_df["stockout_next_7d"]

x_test = test_df[features]
y_test = test_df["stockout_next_7d"]


In [95]:
model = RandomForestClassifier(n_estimators=300,class_weight='balanced',random_state=42)

model.fit(x_train,y_train)

In [98]:
y_pred = model.predict(x_test)
y_prob = model.predict_proba(x_test)[:, 1]

print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))


              precision    recall  f1-score   support

           0       0.87      0.95      0.91       523
           1       0.98      0.93      0.95      1091

    accuracy                           0.94      1614
   macro avg       0.92      0.94      0.93      1614
weighted avg       0.94      0.94      0.94      1614

ROC-AUC: 0.9749418587329323


In [99]:
test_df["stockout_risk_prob"] = y_prob

test_df["jit_reorder_flag"] = (
    (test_df["stockout_risk_prob"] > 0.4) &
    (test_df["simulated_stock"] < 20)
).astype(int)


In [101]:
final_output = test_df[[
    "invoice_date",
    "invoice_line_text",
    "simulated_stock",
    "stockout_risk_prob",
    "jit_reorder_flag"
]]

final_output


Unnamed: 0,invoice_date,invoice_line_text,simulated_stock,stockout_risk_prob,jit_reorder_flag
420,2018-10-24,3M OIL,0,1.000000,1
421,2018-10-25,3M OIL,0,1.000000,1
422,2018-10-26,3M OIL,0,1.000000,1
423,2018-10-27,3M OIL,0,1.000000,1
424,2018-10-28,3M OIL,0,0.996667,1
...,...,...,...,...,...
8174,2018-12-03,WIND SHIELD,73,0.000000,0
8175,2018-12-04,WIND SHIELD,72,0.000000,0
8176,2018-12-05,WIND SHIELD,71,0.000000,0
8177,2018-12-06,WIND SHIELD,70,0.000000,0
