In [8]:
import pandas as pd
from sklearn.utils import resample

real_data = pd.read_csv(r"C:\Users\Abhilash\Documents\python\motor\Production_data_montari_for_ml_model.csv") 

synthetic_data = resample(
    real_data,
    replace=True,
    n_samples=200,
    random_state=42
)

synthetic_data.to_csv("synthetic_data.csv", index=False)


In [2]:
!pip install sdv

Collecting sdv
  Downloading sdv-1.32.1-py3-none-any.whl.metadata (14 kB)
Collecting boto3<2.0.0,>=1.28 (from sdv)
  Downloading boto3-1.42.28-py3-none-any.whl.metadata (6.8 kB)
Collecting botocore<2.0.0,>=1.31 (from sdv)
  Downloading botocore-1.42.28-py3-none-any.whl.metadata (5.9 kB)
Collecting cloudpickle>=2.1.0 (from sdv)
  Downloading cloudpickle-3.1.2-py3-none-any.whl.metadata (7.1 kB)
Collecting graphviz>=0.13.2 (from sdv)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Collecting copulas>=0.12.1 (from sdv)
  Downloading copulas-0.13.0-py3-none-any.whl.metadata (9.4 kB)
Collecting ctgan>=0.11.1 (from sdv)
  Downloading ctgan-0.11.1-py3-none-any.whl.metadata (10 kB)
Collecting deepecho>=0.7.0 (from sdv)
  Downloading deepecho-0.7.0-py3-none-any.whl.metadata (10 kB)
Collecting rdt>=1.18.2 (from sdv)
  Downloading rdt-1.19.0-py3-none-any.whl.metadata (10 kB)
Collecting sdmetrics>=0.21.0 (from sdv)
  Downloading sdmetrics-0.25.0-py3-none-any.whl.metadata (9.4 kB)
Coll

In [39]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import xgboost as xg
from xgboost import XGBRegressor 
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings('ignore')

df = pd.read_csv(r'C:\Users\Abhilash\Documents\python\motor\synthetic_data.csv')
print(df.head())


   Plan_Capacity  Man_power  SHIFT  Total_Down_time  Material_Loss  \
0            237         14    1.0                0              0   
1             13          5    0.5              225            225   
2             20          0    0.0               20              0   
3             23          2    0.5                0              0   
4            193         14    1.0               30              0   

   Has_downtime  Material_Issue  Prev_Day_Completion  Prev_Day_Downtime  \
0             0               0                100.0                  0   
1             1               1                100.0                  0   
2             1               0                100.0                  0   
3             0               0                 83.8                  0   
4             1               0                100.0                  0   

   Actual_Qty  
0         237  
1          13  
2          19  
3          23  
4         175  


In [40]:

np.random.seed(42)
df['noise'] = np.random.normal(0, 0.05, len(df))  # 5% random variation
df['Actual_Qty'] = df['Actual_Qty'] * (1 + df['noise'])
df = df.drop('noise', axis=1)

In [41]:

X = df[['Plan_Capacity', 'Man_power', 'SHIFT', 'Total_Down_time', 
        'Material_Loss', 'Has_downtime', 'Material_Issue', 
        'Prev_Day_Completion', 'Prev_Day_Downtime']]
y = df['Actual_Qty']

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [43]:

print(f"Training{X_train.shape}, Testing{X_test.shape}")

Training(160, 9), Testing(40, 9)


In [44]:
xgb_model = xg.XGBRegressor(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=1,
    reg_alpha=0.1,     
    reg_lambda=1.0,      
    random_state=42,
    n_jobs=-1
)

xgb_model.fit(X_train, y_train, 
              eval_set=[(X_test, y_test)],
               verbose=False)

y_pred_train = xgb_model.predict(X_train)
y_pred_test = xgb_model.predict(X_test)


train_mae = mean_absolute_error(y_train, y_pred_train)
test_mae = mean_absolute_error(y_test, y_pred_test)
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

print(f"Train MAE: {train_mae:.1f} units")
print(f"Test MAE:  {test_mae:.1f} units")
print(f"Train R²:  {train_r2:.3f}")
print(f"Test R²:   {test_r2:.3f}")

Train MAE: 2.7 units
Test MAE:  3.9 units
Train R²:  0.997
Test R²:   0.995


In [None]:
import joblib
import pickle

joblib.dump(xgb_model, 'production_xgboost_model.pkl')

joblib.dump(scaler, 'production_scaler.pkl')

feature_names = ['Plan_Capacity', 'Man_power', 'SHIFT', 'Total_Down_time', 
                'Material_Loss', 'Has_downtime', 'Material_Issue', 
                'Prev_Day_Completion', 'Prev_Day_Downtime']
with open('feature_names.pkl', 'wb') as f:
    pickle.dump(feature_names, f)



✅ SAVED PRODUCTION FILES:
- production_xgboost_model.pkl
- production_scaler.pkl
- feature_names.pkl


In [50]:
!pip install streamlit joblib pandas scikit-learn xgboost plotly

Collecting streamlit
  Downloading streamlit-1.53.0-py3-none-any.whl.metadata (10 kB)
Collecting altair!=5.4.0,!=5.4.1,<7,>=4.0 (from streamlit)
  Downloading altair-6.0.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.5.0 (from streamlit)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<7,>=5.5 (from streamlit)
  Downloading cachetools-6.2.4-py3-none-any.whl.metadata (5.6 kB)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-22.0.0-cp310-cp310-win_amd64.whl.metadata (3.3 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Using cached tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-win_amd64.whl.metadata (44 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading gitpython-3.1.46-py3-none-any.whl.metadata (13 kB