In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("/content/manufacturing_data.csv")
df.head(2)

Unnamed: 0,Record Date,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Work In Progress,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
0,2/24/2015,Q4,Stitching Unit,Tuesday,Line-S1,0.6,22.53,708.0,5040,0,0.0,0,Minor Style Revision,42.0,0.268214
1,1/19/2015,Q3,Stitching Unit,Monday,Line-F3,0.8,11.41,1028.0,4380,50,0.0,0,No Style Change,31.0,0.800359


In [None]:
df['Record Date'] = pd.to_datetime(df['Record Date'], format='%m/%d/%Y')


In [None]:
df.head(3)

Unnamed: 0,Record Date,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Work In Progress,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
0,2015-02-24,Q4,Stitching Unit,Tuesday,Line-S1,0.6,22.53,708.0,5040,0,0.0,0,Minor Style Revision,42.0,0.268214
1,2015-01-19,Q3,Stitching Unit,Monday,Line-F3,0.8,11.41,1028.0,4380,50,0.0,0,No Style Change,31.0,0.800359
2,2015-01-06,Q1,Finishing & Quality,Tuesday,Line-C2,0.8,2.9,,1440,0,0.0,0,No Style Change,8.0,0.681061


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

In [None]:
missing_val = df.isnull().sum()
print(missing_val)

Record Date                0
Fiscal Quarter             0
Production Dept            0
Day Of Week                0
Team                       0
Planned Efficiency         0
Standard Minute Value      0
Work In Progress         506
Over Time Minutes          0
Performance Bonus          0
Idle Minutes               0
Idle Workers               0
Style Change Count         0
Worker Count               0
Efficiency Score           0
dtype: int64


**Feature Engineering**

In [None]:
df['Production Dept'] = df['Production Dept'].str.strip()
count_with_space = df[df['Production Dept'] == ' Finishing & Quality '].shape[0]
df['Production Dept'].value_counts()
print("Rows with leading space:", count_with_space)

Rows with leading space: 0


In [None]:
df_stitch = df.copy()

In [None]:
df_stitch = df_stitch.dropna(subset=['Work In Progress'])
df_stitch.head(2)

Unnamed: 0,Record Date,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Work In Progress,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
0,2015-02-24,Q4,Stitching Unit,Tuesday,Line-S1,0.6,22.53,708.0,5040,0,0.0,0,Minor Style Revision,42.0,0.268214
1,2015-01-19,Q3,Stitching Unit,Monday,Line-F3,0.8,11.41,1028.0,4380,50,0.0,0,No Style Change,31.0,0.800359


In [None]:
cols_encode = ['Fiscal Quarter', 'Production Dept', 'Day Of Week', 'Team', 'Style Change Count']
le = LabelEncoder()

for col in cols_encode:
  df_stitch[col] = le.fit_transform(df_stitch[col])

In [None]:
df_stitch.shape

(691, 15)

In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler

standard_cols = ['Planned Efficiency', 'Standard Minute Value', 'Over Time Minutes', 'Idle Minutes', 'Worker Count', 'Efficiency Score']

robust_cols = ['Work In Progress','Performance Bonus']

scaler_standard = StandardScaler()
scaler_robust = RobustScaler()

df_stitch[standard_cols] = scaler_standard.fit_transform(df_stitch[standard_cols])
df_stitch[robust_cols] = scaler_robust.fit_transform(df_stitch[robust_cols])


In [None]:
df_stitch.head(2)

Unnamed: 0,Record Date,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Work In Progress,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
0,2015-02-24,3,0,4,9,-1.216301,-0.10262,-0.692469,-0.512922,-1.5,-0.075735,0,1,-1.110004,-2.933937
1,2015-01-19,2,0,0,4,0.745795,-1.697698,-0.023013,-0.743496,0.166667,-0.075735,0,2,-2.278906,0.506527


In [None]:
df_stitch.drop(columns=['Record Date'], inplace = True)

In [None]:
df_stitch.head(2)

Unnamed: 0,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Work In Progress,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
0,3,0,4,9,-1.216301,-0.10262,-0.692469,-0.512922,-1.5,-0.075735,0,1,-1.110004,-2.933937
1,2,0,0,4,0.745795,-1.697698,-0.023013,-0.743496,0.166667,-0.075735,0,2,-2.278906,0.506527


In [None]:
target = 'Efficiency Score'
x = df_stitch.drop(columns=[target])
y = df_stitch[target]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred_lr = lr.predict(x_test)

mse_lr = mean_squared_error(y_test,y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("Linear Regression Results")
print("Mean Squared Error: ", mse_lr)
print("R2 score: ", r2_lr)

Linear Regression Results
Mean Squared Error:  0.21455494283655227
R2 score:  0.7100987108760788


In [None]:
dt = DecisionTreeRegressor()
dt.fit(x_train, y_train)
y_pred_dt = dt.predict(x_test)

mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

print("Decision Tree Results")
print("Mean Squared Error: ", mse_dt)
print("R2 Score: ", r2_dt)

Decision Tree Results
Mean Squared Error:  0.20561413357921307
R2 Score:  0.7221793094176294


In [None]:
gb = GradientBoostingRegressor()
gb.fit(x_train, y_train)
y_pred_gb = gb.predict(x_test)

mse_gb = mean_squared_error(y_test, y_pred_gb)
r2_gb = r2_score(y_test,y_pred_gb)

print("Gradient Boosting Regressor")
print("Mean Squared Error: ", mse_gb)
print("R2 Score: ", r2_gb)

Gradient Boosting Regressor
Mean Squared Error:  0.19097580705398287
R2 Score:  0.74195825123168


In [None]:
rf = RandomForestRegressor()
rf.fit(x_train, y_train)
y_pred_rf = rf.predict(x_test)

mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print("Random Forest Regressor")
print("Mean Squared Error: ",mse_rf)
print("R2 Score: ", r2_rf)

Random Forest Regressor
Mean Squared Error:  0.1538491265228509
R2 Score:  0.7921228962618644


In [None]:
rf_hyper = RandomForestRegressor(random_state=42)

param_grid = {
    'max_depth': [None, 5,10,15,20],
    'min_samples_split': [2,5,10],
    'min_samples_leaf' : [1,2,4],
    'criterion' : ['squared_error', 'absolute_error']
    }

grid_search = GridSearchCV(estimator = rf_hyper, param_grid = param_grid, cv=5, scoring = 'r2', n_jobs = -1)

grid_search.fit(x_train, y_train)
print("Best Parameters: ", grid_search.best_params_)
print("Best R2 Score on training data: ", grid_search.best_score_)
best_model = grid_search.best_estimator_
y_pred_hyper = best_model.predict(x_test)
print("Random Forest Hypertuning MSE:", mean_squared_error(y_test, y_pred_hyper))
print("Random Forest Hypertuning R²:", r2_score(y_test, y_pred_hyper))



Best Parameters:  {'criterion': 'absolute_error', 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2}
Best R2 Score on training data:  0.8340877919745463
Random Forest Hypertuning MSE: 0.18213675788507783
Random Forest Hypertuning R²: 0.7539013540789883


In [None]:
df_finish = df.copy()

In [None]:

df_finish = df_finish[df_finish['Work In Progress'].isna()]
df_finish.head(2)

Unnamed: 0,Record Date,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Work In Progress,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
2,2015-01-06,Q1,Finishing & Quality,Tuesday,Line-C2,0.8,2.9,,1440,0,0.0,0,No Style Change,8.0,0.681061
3,2015-02-24,Q4,Finishing & Quality,Tuesday,Line-S2,0.7,3.9,,960,0,0.0,0,No Style Change,8.0,0.325


In [None]:
df_finish.shape

(506, 15)

In [None]:
df_finish.head(2)

Unnamed: 0,Record Date,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Work In Progress,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
2,2015-01-06,Q1,Finishing & Quality,Tuesday,Line-C2,0.8,2.9,,1440,0,0.0,0,No Style Change,8.0,0.681061
3,2015-02-24,Q4,Finishing & Quality,Tuesday,Line-S2,0.7,3.9,,960,0,0.0,0,No Style Change,8.0,0.325


In [None]:
cols2_encode = ['Fiscal Quarter', 'Production Dept', 'Day Of Week', 'Team', 'Style Change Count']
le = LabelEncoder()

for col2 in cols2_encode:
  df_finish[col2] = le.fit_transform(df_finish[col2])

In [None]:
df_finish.drop(columns =['Record Date','Work In Progress'], inplace = True)

In [None]:
df_finish.head(2)

Unnamed: 0,Fiscal Quarter,Production Dept,Day Of Week,Team,Planned Efficiency,Standard Minute Value,Over Time Minutes,Performance Bonus,Idle Minutes,Idle Workers,Style Change Count,Worker Count,Efficiency Score
2,0,0,4,1,0.8,2.9,1440,0,0.0,0,0,8.0,0.681061
3,3,0,4,10,0.7,3.9,960,0,0.0,0,0,8.0,0.325


In [None]:
target2 = 'Efficiency Score'
xa = df_finish.drop(columns=[target2])
ya = df_finish[target2]

In [None]:
xa_train, xa_test, ya_train, ya_test = train_test_split(xa,ya, test_size = 0.2, random_state =42)

In [None]:
lr2 = LinearRegression()
lr2.fit(xa_train, ya_train)
ya_pred_lr2 = lr2.predict(xa_test)

mse_lr2 = mean_squared_error(ya_test, ya_pred_lr2)
r2_lr2 = r2_score(ya_test, ya_pred_lr2)

print("Linear Regression Results")
print("Mean Squared Error: ",mse_lr2)
print("R2 Score: ", r2_lr2)

Linear Regression Results
Mean Squared Error:  0.027642072580953626
R2 Score:  0.22755313158226365


In [None]:
dt2 = DecisionTreeRegressor(max_depth=5, min_samples_split=10, random_state=42)
dt2.fit(xa_train, ya_train)

ya_pred_dt2 = dt2.predict(xa_test)

mse_dt2 = mean_squared_error(ya_test, ya_pred_dt2)
r2_dt2 = r2_score(ya_test, ya_pred_dt2)

print("Decision Tree Regressor")
print("Mean_Squared_Error: ", mse_dt2)
print("R2 Score: ", r2_dt2)

Decision Tree Regressor
Mean_Squared_Error:  0.03356102641806042
R2 Score:  0.062150289867253794


In [None]:
gb2 = GradientBoostingRegressor()
gb2.fit(xa_train, ya_train)

ya_pred_gb2 = gb2.predict(xa_test)

mse_gb2 = mean_squared_error(ya_test, ya_pred_gb2)
r2_gb2 = r2_score(ya_test, ya_pred_gb2)

print("Gradient Boosting Regressor")
print("Mean Squared Error: ", mse_gb2)
print("R2 Score: ", r2_gb2)

Gradient Boosting Regressor
Mean Squared Error:  0.025666656784542708
R2 Score:  0.28275535063771695


In [None]:
rf2 = RandomForestRegressor()
rf2.fit(xa_train, ya_train)

ya_pred_rf2 = rf2.predict(xa_test)

mse_rf2 = mean_squared_error(ya_test, ya_pred_rf2)
r2_rf2 = r2_score(ya_test, ya_pred_rf2)

print("Random Forest Regressor")
print("Mean Squared Error: ", mse_rf2)
print("r2 Score: ", r2_rf2)

Random Forest Regressor
Mean Squared Error:  0.023658291333187344
r2 Score:  0.3388783348674438


In [None]:
rf2_hyper = RandomForestRegressor(random_state=42)

param_grid2 = {
    'max_depth': [None, 5,10,15,20],
    'min_samples_split': [2,5,10],
    'min_samples_leaf' : [1,2,4],
    'criterion' : ['squared_error', 'absolute_error']
    }

grid_search2 = GridSearchCV(estimator = rf2_hyper, param_grid = param_grid2, cv=5, scoring = 'r2', n_jobs = -1)

grid_search2.fit(xa_train, ya_train)
print("Best Parameters: ", grid_search2.best_params_)
print("Best R2 Score on training data: ", grid_search2.best_score_)
best_model2 = grid_search2.best_estimator_
ya_pred_rf2_hyper = best_model2.predict(xa_test)
print("Random Forest Hypertuning MSE:", mean_squared_error(ya_test, ya_pred_rf2_hyper))
print("Random Forest Hypertuning R²:", r2_score(ya_test, ya_pred_rf2_hyper))

Best Parameters:  {'criterion': 'squared_error', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 10}
Best R2 Score on training data:  0.18947064604053446
Random Forest Hypertuning MSE: 0.023997510412762137
Random Forest Hypertuning R²: 0.32939899083643065
