In [1]:
import pandas as pd 
import numpy as np 
import logging
from functions.data_prep import DataPreparation
from functions.prediction import ProductivityPredictModel

In [2]:
df = pd.read_csv('data/garments_worker_productivity.csv')

In [3]:
data_prep = DataPreparation(df=df)


In [4]:
missing_cols = data_prep.check_missing_data()
missing_cols.values

array(['wip'], dtype=object)

In [5]:
data_prep.check_missing_data().empty

False

In [6]:
df[missing_cols]

Unnamed: 0,wip
0,1108.0
1,
2,968.0
3,968.0
4,1170.0
...,...
1192,
1193,
1194,
1195,


In [7]:
data_prep.impute_missing_data(missing_cols=missing_cols, impute_option='zero')

Unnamed: 0,date,quarter,department,day,team,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity
0,1/1/2015,Quarter1,sweing,Thursday,8,0.80,26.16,1108.0,7080,98,0.0,0,0,59.0,0.940725
1,1/1/2015,Quarter1,finishing,Thursday,1,0.75,3.94,0.0,960,0,0.0,0,0,8.0,0.886500
2,1/1/2015,Quarter1,sweing,Thursday,11,0.80,11.41,968.0,3660,50,0.0,0,0,30.5,0.800570
3,1/1/2015,Quarter1,sweing,Thursday,12,0.80,11.41,968.0,3660,50,0.0,0,0,30.5,0.800570
4,1/1/2015,Quarter1,sweing,Thursday,6,0.80,25.90,1170.0,1920,50,0.0,0,0,56.0,0.800382
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1192,3/11/2015,Quarter2,finishing,Wednesday,10,0.75,2.90,0.0,960,0,0.0,0,0,8.0,0.628333
1193,3/11/2015,Quarter2,finishing,Wednesday,8,0.70,3.90,0.0,960,0,0.0,0,0,8.0,0.625625
1194,3/11/2015,Quarter2,finishing,Wednesday,7,0.65,3.90,0.0,960,0,0.0,0,0,8.0,0.625625
1195,3/11/2015,Quarter2,finishing,Wednesday,9,0.75,2.90,0.0,1800,0,0.0,0,0,15.0,0.505889


In [8]:
print(data_prep.check_missing_data())

None


In [9]:
df = data_prep.correct_column_value()

In [10]:
model = ProductivityPredictModel()

In [11]:
model.create_lag_features(data=df, features = ['actual_productivity', 'targeted_productivity', 'over_time'], lags=2).info()

<class 'pandas.core.frame.DataFrame'>
Index: 1195 entries, 2 to 1196
Data columns (total 22 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   date                         1195 non-null   object 
 1   day                          1195 non-null   int64  
 2   team                         1195 non-null   int64  
 3   targeted_productivity        1195 non-null   float64
 4   smv                          1195 non-null   float64
 5   wip                          1195 non-null   float64
 6   over_time                    1195 non-null   int64  
 7   incentive                    1195 non-null   int64  
 8   idle_time                    1195 non-null   float64
 9   idle_men                     1195 non-null   int64  
 10  no_of_style_change           1195 non-null   int64  
 11  no_of_workers                1195 non-null   float64
 12  actual_productivity          1195 non-null   float64
 13  department_sweing      

In [12]:
model.fit(data=df, features = ['actual_productivity', 'targeted_productivity', 'over_time'], 
lags=2, 
target='actual_productivity', 
split_date='3/10/2015', 
date_column='date')

In [13]:
model.X_test

Unnamed: 0,day,team,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,department_sweing,month,num_week,actual_productivity_lag_1,actual_productivity_lag_2,targeted_productivity_lag_1,targeted_productivity_lag_2,over_time_lag_1,over_time_lag_2
1005,2,2,0.70,3.90,0.0,1200,0,0.0,0,0,10.0,False,3,10,0.350417,0.447083,0.80,0.75,3360.0,960.0
1006,2,3,0.80,4.60,0.0,10080,0,0.0,0,0,24.0,False,3,10,0.928850,0.350417,0.70,0.80,1200.0,3360.0
1007,2,4,0.65,3.94,0.0,960,0,0.0,0,0,8.0,False,3,10,0.860370,0.928850,0.80,0.70,10080.0,1200.0
1008,2,1,0.65,26.66,1527.0,6840,65,0.0,0,0,57.0,True,3,10,0.806879,0.860370,0.65,0.80,960.0,10080.0
1009,2,12,0.80,15.26,1035.0,4080,63,0.0,0,0,34.0,True,3,10,0.800580,0.806879,0.65,0.65,6840.0,960.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1192,11,10,0.75,2.90,0.0,960,0,0.0,0,0,8.0,False,3,11,0.650596,0.700246,0.65,0.70,6840.0,4560.0
1193,11,8,0.70,3.90,0.0,960,0,0.0,0,0,8.0,False,3,11,0.628333,0.650596,0.75,0.65,960.0,6840.0
1194,11,7,0.65,3.90,0.0,960,0,0.0,0,0,8.0,False,3,11,0.625625,0.628333,0.70,0.75,960.0,960.0
1195,11,9,0.75,2.90,0.0,1800,0,0.0,0,0,15.0,False,3,11,0.625625,0.625625,0.65,0.70,960.0,960.0


In [14]:
model.y_test

1005    0.928850
1006    0.860370
1007    0.806879
1008    0.800580
1009    0.800402
          ...   
1192    0.628333
1193    0.625625
1194    0.625625
1195    0.505889
1196    0.394722
Name: actual_productivity, Length: 192, dtype: float64

In [15]:
model.predict()

Unnamed: 0,day,team,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_style_change,...,month,num_week,actual_productivity_lag_1,actual_productivity_lag_2,targeted_productivity_lag_1,targeted_productivity_lag_2,over_time_lag_1,over_time_lag_2,actual,predicted
1005,2,2,0.70,3.90,0.0,1200,0,0.0,0,0,...,3,10,0.350417,0.447083,0.80,0.75,3360.0,960.0,0.928850,0.529250
1006,2,3,0.80,4.60,0.0,10080,0,0.0,0,0,...,3,10,0.928850,0.350417,0.70,0.80,1200.0,3360.0,0.860370,0.879060
1007,2,4,0.65,3.94,0.0,960,0,0.0,0,0,...,3,10,0.860370,0.928850,0.80,0.70,10080.0,1200.0,0.806879,0.821200
1008,2,1,0.65,26.66,1527.0,6840,65,0.0,0,0,...,3,10,0.806879,0.860370,0.65,0.80,960.0,10080.0,0.800580,0.786035
1009,2,12,0.80,15.26,1035.0,4080,63,0.0,0,0,...,3,10,0.800580,0.806879,0.65,0.65,6840.0,960.0,0.800402,0.782502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1192,11,10,0.75,2.90,0.0,960,0,0.0,0,0,...,3,11,0.650596,0.700246,0.65,0.70,6840.0,4560.0,0.628333,0.578820
1193,11,8,0.70,3.90,0.0,960,0,0.0,0,0,...,3,11,0.628333,0.650596,0.75,0.65,960.0,6840.0,0.625625,0.616407
1194,11,7,0.65,3.90,0.0,960,0,0.0,0,0,...,3,11,0.625625,0.628333,0.70,0.75,960.0,960.0,0.625625,0.673535
1195,11,9,0.75,2.90,0.0,1800,0,0.0,0,0,...,3,11,0.625625,0.625625,0.65,0.70,960.0,960.0,0.505889,0.718554
