In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
Train_col_list=['order quantity', 
                'SPEC_1st_N', 
                'SPEC_1st_L1', 
                'SPEC_1st_L2',
                'SPEC_1st_L3', 
                'SPEC_2nd_L1',
                'SPEC_2nd_L2',
                'SPEC_2nd_L3',
                'Customer',
                'By_way',
                'Types',
                'Ap_outer_r',
                'Ap_L',
                'cen_outter_r',
                'cen_L',
                'cen_material',
                'Shell_outer_r_1',
                'Shell_outer_r_2',
                'shell_L',
                'shell_material',
                'Human_Hrs',
                'Material_Cost']

## Train to get human-hrs

In [None]:
9## load data ##
df_dir=r'data_train.pkl'
df= pd.read_pickle(df_dir)
## Pandas settings
pd.set_option('display.max_columns', None)
df.reset_index(inplace=True,drop=True)
df=df[Train_col_list]
q=df['Human_Hrs'].quantile(0.99)
df=df[df['Human_Hrs']<q]
df

In [None]:
def feature_col_clean_split (dataframe):
    
    from sklearn.preprocessing import MinMaxScaler
    numerical = dataframe.select_dtypes(exclude=['object'])
    numerical.fillna(0,inplace = True)
    numerical.round(4)
    categoric = dataframe.select_dtypes(include=['object'])
    categoric.fillna('NONE',inplace = True)
    dataframe = numerical.merge(categoric, left_index = True, right_index = True)
    
    dummies = pd.get_dummies(categoric,drop_first=True)
    
    data = pd.concat([numerical,dummies],axis=1)
    
    return data,numerical,categoric,dummies

In [None]:
data,num,cat,dum=feature_col_clean_split(df)

In [None]:
dum.columns

In [None]:
data

# Set X and y

In [None]:
y=data['Human_Hrs'].values
X=data.drop(['Human_Hrs'],axis=1).values

### Train Test splitting 

In [None]:
from sklearn.model_selection import train_test_split
x0_train, X_test, y0_train, y_test = train_test_split(X, y, test_size=0.1)
X_train, X_val, y_train, y_val= train_test_split(x0_train, y0_train, test_size=0.1)
print(len(X_train), 'train dataset')
print(len(X_val), 'validation dataset')
print(len(X_test), 'test dataset')

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_val=scaler.fit_transform(X_val)
X_test=scaler.transform(X_test)

## Save Scaler
import joblib
joblib.dump(scaler,'x_scaler_huhrs.pkl')

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
X_train.shape

In [None]:
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping


model=Sequential()

model.add(Dense(X_train.shape[1],activation='relu'))
model.add(Dense(254,activation='relu'))
model.add(Dense(254,activation='relu'))
model.add(Dense(508,activation='relu'))
model.add(Dense(508,activation='relu'))
model.add(Dense(508,activation='relu'))
model.add(Dense(1016,activation='relu'))
model.add(Dense(1016,activation='relu'))
model.add(Dense(1016,activation='relu'))
model.add(Dense(1016,activation='relu'))
model.add(Dense(1016,activation='relu'))
model.add(Dense(1016,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(508,activation='relu'))
model.add(Dense(508,activation='relu'))
model.add(Dense(508,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(254,activation='relu'))
model.add(Dense(254,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(X_train.shape[1],activation='relu'))

model.add((Dense(1)))
model.compile(optimizer='adam',loss='mse')

In [None]:
early_stop=EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=100)
model.fit(x=X_train,y=y_train,
          validation_data=(X_val,y_val),
          batch_size=512,
          epochs=2000,
          verbose=0,
          callbacks=[early_stop]
          )


In [None]:
losses=pd.DataFrame(model.history.history)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
losses.plot()

In [None]:
model.summary()

In [None]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,explained_variance_score

In [None]:
predictions=model.predict(X_test)

In [None]:
np.sqrt(mean_squared_error(y_test,predictions))

In [None]:
mean_absolute_error(y_test,predictions)

In [None]:
df['Human_Hrs'].describe()

In [None]:
explained_variance_score(y_test,predictions)

In [None]:
plt.figure(figsize=(12,6))
plt.scatter(y_test,predictions)
plt.plot(y_test,y_test,'r')
plt.xlabel('Real value')
plt.ylabel('Predictions')
plt.show()

In [None]:
import numpy as np
i=np.random.randint(len(df), size=1)
working_hrs=data.drop('Human_Hrs',axis=1).iloc[i]

In [None]:
working_hrs=scaler.transform(working_hrs.values.reshape(-1,X_train.shape[1]))

In [None]:
model.predict(working_hrs)

In [None]:
y_test.shape

In [None]:
errors = y_test.reshape(y_test.shape[0], 1) - predictions

In [None]:
import seaborn as sns
plt.figure(figsize=(12,6))
sns.distplot(errors)
plt.xlabel('Errors')
plt.show()

In [None]:
#model.save('model_noW.h5')