# =====================================================================================
# <center><h1>Forecasting Spare-Part Inventory</h1></center>
# =====================================================================================

### Description

* This dataset contains vehicle service records with invoice dates, vehicle details, odometer readings, and spare part descriptions, used to analyze and predict maintenance patterns and spare parts demand.


### Import lib & models 

In [86]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

import warnings
warnings.filterwarnings("ignore")


### Loading Dataset & Basic Checks

In [87]:
df=pd.read_csv('Spare_part_inventory.csv')
df.head()

Unnamed: 0,invoice_date,job_card_date,business_partner_name,vehicle_no,vehicle_model,current_km_reading,invoice_line_text
0,30-05-17,30-05-17,shivXXXXXXXXXX,KA03MFXXXX,BAJAJ AVENGER STREET 220,50000,ENGINE OIL
1,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,ENGINE OIL
2,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,POLISH
3,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,CONSUMABLES
4,02-06-17,31-05-17,KIRAXXXXXXXXXX,KA53ESXXXX,BAJAJ PULSAR NS 200,758,COOLANT OIL


In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28482 entries, 0 to 28481
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   invoice_date           28482 non-null  object
 1   job_card_date          28482 non-null  object
 2   business_partner_name  28482 non-null  object
 3   vehicle_no             28482 non-null  object
 4   vehicle_model          28482 non-null  object
 5   current_km_reading     28482 non-null  int64 
 6   invoice_line_text      28448 non-null  object
dtypes: int64(1), object(6)
memory usage: 1.5+ MB


In [31]:
df.describe(include='O')

Unnamed: 0,invoice_date,job_card_date,business_partner_name,vehicle_no,vehicle_model,invoice_line_text
count,28482,28482,28482,28482,28482,28448
unique,555,553,1010,846,28,502
top,01-12-18,01-12-18,venkXXXXXXXXXX,KA53EVXXXX,BAJAJ PULSAR 150,ENGINE OIL
freq,179,179,424,1313,8633,3802


In [88]:
df["invoice_date"] = pd.to_datetime(df["invoice_date"], dayfirst=True)

df = df.sort_values(["invoice_line_text", "invoice_date"])

df.reset_index(drop=True, inplace=True)

In [89]:
daily_demand = (
    df.groupby(["invoice_line_text", "invoice_date"])
      .size()
      .reset_index(name="daily_usage")
)
daily_demand

Unnamed: 0,invoice_line_text,invoice_date,daily_usage
0,10 SIZE NUT,2018-06-23,1
1,10 SIZE NUT,2018-09-10,1
2,12 SIZE BOLT,2018-08-28,1
3,12 SIZE BOLT,2018-09-04,1
4,3M OIL,2017-05-31,2
...,...,...,...
12660,clutch cable,2017-10-11,1
12661,handle weight,2017-08-28,1
12662,handle weight,2017-10-02,1
12663,handle weight,2017-10-07,1


In [90]:
# daily_demand["avg_usage_7d"] = (
#     daily_demand.groupby("invoice_line_text")["daily_usage"]
#     .transform(lambda x: x.rolling(window=7,min_periods=1).mean().shift(1))
# )

# daily_demand["avg_usage_30d"] = (
#     daily_demand.groupby("invoice_line_text")["daily_usage"]
#     .transform(lambda x: x.rolling(window=30,min_periods=1).mean().shift(1))
# )

# daily_demand["usage_std_30d"] = (
#     daily_demand.groupby("invoice_line_text")["daily_usage"]
#     .transform(lambda x: x.rolling(window=30,min_periods=1).std().shift(1)).fillna(0)
# )

In [None]:
daily_demand["avg_usage_7d"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.rolling(window=7,min_periods=1).mean())
)

daily_demand["avg_usage_30d"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.rolling(window=30,min_periods=1).mean())
)

daily_demand["usage_std_30d"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.rolling(window=30,min_periods=1).std()).fillna(0)
)

In [91]:
daily_demand["day_of_week"] = daily_demand["invoice_date"].dt.dayofweek
daily_demand["month"] = daily_demand["invoice_date"].dt.month
daily_demand["is_weekend"] = daily_demand["day_of_week"].isin([5,6]).astype(int)

In [92]:
INITIAL_STOCK = 110     
SUPPLIER_LEAD_TIME = 7   


In [93]:
daily_demand["simulated_stock"] = (
    INITIAL_STOCK -
    daily_demand.groupby("invoice_line_text")["daily_usage"].cumsum()
)

daily_demand["simulated_stock"] = daily_demand["simulated_stock"].clip(lower=0)


In [94]:
# daily_demand["future_7d_usage"] = (
#     daily_demand
#     .groupby("invoice_line_text", group_keys=False)
#     .apply(
#         lambda df: df.apply(
#             lambda r: df.loc[
#                 (df["invoice_date"] > r["invoice_date"]) &
#                 (df["invoice_date"] <= r["invoice_date"] + pd.Timedelta(days=7)),
#                 "daily_usage"
#             ].sum(),
#             axis=1
#         )
#     )
# )


# daily_demand["simulated_stock_lag1"] = (
#     daily_demand
#     .groupby("invoice_line_text")["simulated_stock"]
#     .shift(1)
# )


In [39]:
daily_demand["future_7d_usage"] = (
    daily_demand.groupby("invoice_line_text")["daily_usage"]
    .transform(lambda x: x.shift(-1).rolling(7).sum())
)


In [95]:
np.random.seed(42)

daily_demand["noisy_future_7d_usage"] = (
    daily_demand["future_7d_usage"] *
    np.random.normal(loc=1.0, scale=0.2, size=len(daily_demand))
).clip(lower=0)


In [96]:
daily_demand["stockout_next_7d"] = (
    daily_demand["noisy_future_7d_usage"] > daily_demand["simulated_stock"]
).astype(int)


In [97]:
daily_demand.columns

Index(['invoice_line_text', 'invoice_date', 'daily_usage', 'avg_usage_7d',
       'avg_usage_30d', 'usage_std_30d', 'day_of_week', 'month', 'is_weekend',
       'simulated_stock', 'future_7d_usage', 'simulated_stock_lag1',
       'noisy_future_7d_usage', 'stockout_next_7d'],
      dtype='object')

In [98]:
# features = [
#     "invoice_line_text",
#     "avg_usage_7d",
#     "avg_usage_30d",
#     "usage_std_30d",
#     "day_of_week",
#     "month",
#     "is_weekend",
#     "simulated_stock_lag1"
# ]


In [65]:
features = [
    "avg_usage_7d",
    "avg_usage_30d",
    "usage_std_30d",
    "day_of_week",
    "month",
    "is_weekend"
]

# daily_demand = daily_demand.dropna().reset_index(drop=True)


In [99]:
daily_demand.head(20)

Unnamed: 0,invoice_line_text,invoice_date,daily_usage,avg_usage_7d,avg_usage_30d,usage_std_30d,day_of_week,month,is_weekend,simulated_stock,future_7d_usage,simulated_stock_lag1,noisy_future_7d_usage,stockout_next_7d
0,10 SIZE NUT,2018-06-23,1,,,0.0,5,6,1,109,0,,0.0,0
1,10 SIZE NUT,2018-09-10,1,1.0,1.0,0.0,0,9,0,108,0,109.0,0.0,0
2,12 SIZE BOLT,2018-08-28,1,,,0.0,1,8,0,109,1,,1.129538,0
3,12 SIZE BOLT,2018-09-04,1,1.0,1.0,0.0,1,9,0,108,0,109.0,0.0,0
4,3M OIL,2017-05-31,2,,,0.0,2,5,0,108,23,,21.922894,0
5,3M OIL,2017-06-01,5,2.0,2.0,0.0,3,6,0,103,20,108.0,19.063452,0
6,3M OIL,2017-06-02,3,3.5,3.5,2.12132,4,6,0,100,19,103.0,25.001009,0
7,3M OIL,2017-06-03,2,3.333333,3.333333,1.527525,5,6,1,98,19,100.0,21.916252,0
8,3M OIL,2017-06-04,3,3.0,3.0,1.414214,6,6,1,95,18,98.0,16.309892,0
9,3M OIL,2017-06-05,2,3.0,3.0,1.224745,0,6,0,93,21,95.0,23.278752,0


In [100]:
# from sklearn.preprocessing import LabelEncoder

# le = LabelEncoder()

# daily_demand['invoice_line_text'] = le.fit_transform(daily_demand['invoice_line_text'])


In [101]:
split_date = daily_demand["invoice_date"].quantile(0.8)

train_df = daily_demand[daily_demand["invoice_date"] <= split_date]
test_df  = daily_demand[daily_demand["invoice_date"] > split_date]

x_train = train_df[features]
y_train = train_df["stockout_next_7d"]

x_test = test_df[features]
y_test = test_df["stockout_next_7d"]


In [102]:
model = RandomForestClassifier(n_estimators=300,class_weight='balanced',random_state=42)

model.fit(x_train,y_train)

In [103]:
y_pred = model.predict(x_test)
y_prob = model.predict_proba(x_test)[:, 1]

print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))


              precision    recall  f1-score   support

           0       1.00      0.96      0.98      1181
           1       0.97      1.00      0.98      1350

    accuracy                           0.98      2531
   macro avg       0.98      0.98      0.98      2531
weighted avg       0.98      0.98      0.98      2531

ROC-AUC: 0.9898287703452817


In [104]:
test_df["stockout_risk_prob"] = y_prob

test_df["jit_reorder_flag"] = (
    (test_df["stockout_risk_prob"] > 0.4) &
    (test_df["simulated_stock"] < 20)
).astype(int)


In [105]:
final_output = test_df[[
    "invoice_date",
    "invoice_line_text",
    "simulated_stock",
    "stockout_risk_prob",
    "jit_reorder_flag"
]]

final_output


Unnamed: 0,invoice_date,invoice_line_text,simulated_stock,stockout_risk_prob,jit_reorder_flag
437,2018-10-05,2,0,1.0,1
438,2018-10-06,2,0,1.0,1
439,2018-10-08,2,0,1.0,1
440,2018-10-09,2,0,1.0,1
441,2018-10-10,2,0,1.0,1
...,...,...,...,...,...
12561,2019-01-03,491,90,0.0,0
12597,2018-10-09,494,92,0.0,0
12598,2018-10-11,494,91,0.0,0
12599,2018-11-24,494,90,0.0,0
