In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [2]:
df = pd.read_csv('Amreli NRM data for model building final.csv')
campaign = np.unique(df['campaign'])

for c in campaign:
    dfc = df[df['campaign']==c]
    i = dfc.index[0]
    df = df.drop(i)
    
df = df.reset_index(drop=True)
df['Number of capacitors removed'].fillna(0, inplace=True)

threshold = 150

for i in range(df.shape[0]):
    if(df.loc[i,'Total Heat Time previous (minutes)']>threshold):
        df.loc[i,'Total Heat Time previous (minutes)']= threshold
    if(df.loc[i,'Total Heat Time (minutes)']>threshold):
        df.loc[i,'Total Heat Time (minutes)']= threshold
        
df['Input energy previous (KWH)'] = df['Power previous']*df['Total Heat Time previous (minutes)']/60
df['Input energy (KWH)'] = df['Power']*df['Total Heat Time (minutes)']/60

In [4]:
df.columns

Index(['campaign', 'Heat number (life number)',
       'Total Heat Time previous (minutes)', 'Total Heat Time (minutes)',
       'Scrap (MT)', 'Sponge (MT)', 'Hot Heel (MT)', 'Alloys (MT)',
       'Input (MT)', 'Final C%', 'Tapping Temperature (C)', 'Output (MT)',
       'Input energy previous (KWH)', 'Input energy (KWH)', 'Previous Voltage',
       'Previous Frequency', 'Previous Rating of the Furnace', 'Voltage',
       'Frequency', 'Rating of the furnace (KW)', 'Melt rate (MT/Hr)',
       'Efficiency (MT/KWH)', 'Number of capacitors removed',
       'Refractory lining before heating', 'Refractory lining after heating',
       'Previous Input', 'Previous Output', 'Power', 'Power previous'],
      dtype='object')

In [5]:
df1 = df[df['Number of capacitors removed']==0].reset_index(drop=True)
df2 = df[df['Number of capacitors removed']!=0].reset_index(drop=True)

# Without Capacitor removal

In [12]:
input_columns = ['Scrap (MT)', 'Sponge (MT)', 'Hot Heel (MT)', 'Alloys (MT)','Previous Voltage','Previous Frequency','Refractory lining before heating','Number of capacitors removed','Tapping Temperature (C)']
output_columns = ['Voltage','Frequency','Refractory lining after heating']

X_s = df1[input_columns]
from sklearn.preprocessing import MinMaxScaler
scaler1 = MinMaxScaler()
scaler2 = MinMaxScaler()
X = scaler1.fit_transform(X_s)

X_train = X[:191,:]
X_test = X[191:,:]
y_s = df1[output_columns]
y_s1 = y_s.iloc[:191,:].values
y_s2 = y_s.iloc[191:,:].values
y = scaler2.fit_transform(y_s)
y_train = y[:191,:]
y_test = y[191:,:]

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_train1 = lr.predict(X_train)
y_pred_test1 = lr.predict(X_test)

y_pred_train = scaler2.inverse_transform(y_pred_train1)
y_pred_test = scaler2.inverse_transform(y_pred_test1)


for i in range(3):
    print(output_columns[i])
    print("Train score is {}".format(mean_squared_error(y_s1[:,i], y_pred_train[:,i])))
    print("Test score is {}".format(mean_squared_error(y_s2[:,i], y_pred_test[:,i])))
    print("R2 score is {}".format(r2_score(y_s2[:,i], y_pred_test[:,i])))
    print("Train NMSE score is {}".format(np.sum(np.square(y_pred_train[:,i]-y_s1[:,i]))/np.sum(np.square(y_s1[:,i]-np.mean(y_s1[:,i])))*100))
    print("Test NMSE score is {}".format(np.sum(np.square(y_pred_test[:,i]-y_s2[:,i]))/np.sum(np.square(y_s2[:,i]-np.mean(y_s2[:,i])))*100))

for i in range(lr.coef_.shape[0]):
    for j in range(lr.coef_.shape[1]):
        print("Weight of {} on {} is {}".format(input_columns[j], output_columns[i],lr.coef_[i][j]))
    print('\n')

Voltage
Train score is 2573.0622978765145
Test score is 1544.498060086697
R2 score is 0.9355223393507034
Train NMSE score is 15.097670244450626
Test NMSE score is 6.447766064929665
Frequency
Train score is 9.199292614815265
Test score is 2.886873758281543
R2 score is 0.935901742398932
Train NMSE score is 10.607350751874012
Test NMSE score is 6.409825760106805
Refractory lining after heating
Train score is 2.7031194211736075e-05
Test score is 7.175876376530534e-05
R2 score is 0.9997666695556623
Train NMSE score is 0.008147182157644605
Test NMSE score is 0.023333044433776
Weight of Scrap (MT) on Voltage is -0.002867865180835507
Weight of Sponge (MT) on Voltage is 0.0017990143996006577
Weight of Hot Heel (MT) on Voltage is 2.220446049250313e-16
Weight of Alloys (MT) on Voltage is -0.048026227907818866
Weight of Previous Voltage on Voltage is 0.8380754202749178
Weight of Previous Frequency on Voltage is -0.19007486341281804
Weight of Refractory lining before heating on Voltage is 0.1072893

In [14]:
import pickle
pickle.dump(lr, open('lin_model_no_capacitors_removed.pkl', 'wb'))
pickle.dump(scaler1, open('scaler_input_no_capacitors_removed.pkl', 'wb'))
pickle.dump(scaler2, open('scaler_output_no_capacitors_removed.pkl', 'wb'))

# Capacitors removed

In [13]:
input_columns = ['Scrap (MT)', 'Sponge (MT)', 'Hot Heel (MT)', 'Alloys (MT)','Previous Voltage','Previous Frequency','Refractory lining before heating','Number of capacitors removed','Tapping Temperature (C)']
output_columns = ['Voltage','Frequency','Refractory lining after heating']

X = df[input_columns]
X_train = X.iloc[:318,:].values
X_test = X.iloc[318:,:].values
y = df[output_columns]
y_train = y.iloc[:318,:].values
y_test = y.iloc[318:,:].values

scaler1 = MinMaxScaler()
normalized_X_train = scaler1.fit_transform(X_train)
normalized_X_test = scaler1.transform(X_test)

scaler2 = MinMaxScaler()
normalized_y_train = scaler2.fit_transform(y_train)
normalized_y_test = scaler2.transform(y_test)

X_train = X[:127,:]
X_test = X[127:,:]
y_s = df2[output_columns]
y_s1 = y_s.iloc[:127,:].values
y_s2 = y_s.iloc[127:,:].values
y = scaler2.fit_transform(y_s)
y_train = y[:127,:]
y_test = y[127:,:]

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_train1 = lr.predict(X_train)
y_pred_test1 = lr.predict(X_test)

y_pred_train = scaler2.inverse_transform(y_pred_train1)
y_pred_test = scaler2.inverse_transform(y_pred_test1)


for i in range(3):
    print(output_columns[i])
    print("Train score is {}".format(mean_squared_error(y_s1[:,i], y_pred_train[:,i])))
    print("Test score is {}".format(mean_squared_error(y_s2[:,i], y_pred_test[:,i])))
    print("R2 score is {}".format(r2_score(y_s2[:,i], y_pred_test[:,i])))
    print("Train NMSE score is {}".format(np.sum(np.square(y_pred_train[:,i]-y_s1[:,i]))/np.sum(np.square(y_s1[:,i]-np.mean(y_s1[:,i])))*100))
    print("Test NMSE score is {}".format(np.sum(np.square(y_pred_test[:,i]-y_s2[:,i]))/np.sum(np.square(y_s2[:,i]-np.mean(y_s2[:,i])))*100))

for i in range(lr.coef_.shape[0]):
    for j in range(lr.coef_.shape[1]):
        print("Weight of {} on {} is {}".format(input_columns[j], output_columns[i],lr.coef_[i][j]))
    print('\n')

Voltage
Train score is 8191.7792137506485
Test score is 10225.704204549207
R2 score is 0.06963319187479755
Train NMSE score is 77.40881709012942
Test NMSE score is 93.03668081252025
Frequency
Train score is 26.406419703051668
Test score is 41.40223847796711
R2 score is 0.9339811887271484
Train NMSE score is 4.1198774780208955
Test NMSE score is 6.60188112728516
Refractory lining after heating
Train score is 5.6560996157146714e-05
Test score is 0.00020050004809758543
R2 score is 0.9980060060909136
Train NMSE score is 0.027896040051354765
Test NMSE score is 0.1993993909086339
Weight of Scrap (MT) on Voltage is -0.3465704330030269
Weight of Sponge (MT) on Voltage is -0.38397666347164017
Weight of Hot Heel (MT) on Voltage is -1.3877787807814457e-16
Weight of Alloys (MT) on Voltage is 0.010877481352767216
Weight of Previous Voltage on Voltage is 0.328442410080269
Weight of Previous Frequency on Voltage is -0.12101555307536246
Weight of Refractory lining before heating on Voltage is 0.301492

In [15]:
import pickle
pickle.dump(lr, open('lin_model_capacitors_removed.pkl', 'wb'))
pickle.dump(scaler1, open('scaler_input_capacitors_removed.pkl', 'wb'))
pickle.dump(scaler2, open('scaler_output_capacitors_removed.pkl', 'wb'))