In [32]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [37]:
data = pd.read_csv('stockpile.csv', header=0)
print(data.shape)
data = data.drop(["Date"], axis = 1)
data

(479, 17)


Unnamed: 0,Time,Temperature,wind_direction,Ambient Temperature (in °C),Delta Temp (Y),Log (Y),Temperature - 1 (in °C),Delta Temp (Y-1),North,East,West,South,Wind Impact (Should be linear variation & wind speed),FACE-A,FACE-B,FACE-C
0,10:30:00,44,165,27,17,2.833213,40,13,0,0,1,0,0.000000,1,0,0
1,10:30:00,51,165,27,24,3.178054,43,16,0,0,1,0,0.000000,0,1,0
2,10:30:00,53,165,27,26,3.258097,49,22,0,0,0,1,0.965926,1,0,0
3,10:30:00,61,165,27,34,3.526361,58,31,0,0,0,1,0.965926,0,1,0
4,10:30:00,65,165,27,38,3.637586,62,35,0,0,0,1,0.965926,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,15:30:00,41,180,27,14,2.639057,37,10,0,0,0,1,1.000000,1,0,0
475,15:30:00,35,180,27,8,2.079442,36,9,0,0,0,1,1.000000,0,1,0
476,15:30:00,32,180,27,5,1.609438,44,17,0,1,0,0,0.000000,1,0,0
477,15:30:00,32,180,27,5,1.609438,38,11,1,0,0,0,0.000000,0,1,0


In [38]:
# label_encoder object knows how to understand word labels. 
label_encoder = LabelEncoder() 
unique_times =data['Time'].unique() 
print("initial:", unique_times)
data['Time']= label_encoder.fit_transform(data['Time']) 
print("transformed:", label_encoder.transform(unique_times))
data['Time'].unique() 

initial: ['10:30:00' '11:00:00' '11:30:00' '12:00:00' '12:30:00' '13:00:00'
 '10:00:00' '14:30:00' '15:00:00' '15:30:00' '13:30:00']
transformed: [ 1  2  3  4  5  6  0  8  9 10  7]


array([ 1,  2,  3,  4,  5,  6,  0,  8,  9, 10,  7])

In [39]:
stdscl = StandardScaler(copy=False, with_mean = False)
stdscl.fit(data)
data = stdscl.transform(data)

data

array([[0.31563061, 3.87360362, 1.96119537, ..., 2.04599992, 0.        ,
        0.        ],
       [0.31563061, 4.48985874, 1.96119537, ..., 0.        , 2.02363859,
        0.        ],
       [0.31563061, 4.66593163, 1.96119537, ..., 2.04599992, 0.        ,
        0.        ],
       ...,
       [3.15630611, 2.81716627, 2.13948586, ..., 2.04599992, 0.        ,
        0.        ],
       [3.15630611, 2.81716627, 2.13948586, ..., 0.        , 2.02363859,
        0.        ],
       [3.15630611, 2.64109338, 2.13948586, ..., 0.        , 0.        ,
        2.59377831]])

Pytorch model

In [44]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

In [56]:
class Feedforward(nn.Module):
        def __init__(self, input_size, hidden_size):
            super().__init__()
            self.input_size = input_size
            self.hidden_size  = hidden_size
            
            self.fc1 = nn.Linear(self.input_size, self.hidden_size)
            self.relu = nn.ReLU()
            self.fc2 = nn.Linear(self.hidden_size, 1)
            #self.sigmoid = nn.Sigmoid()
        def forward(self, x):
            x = self.relu(self.fc1(x))
            output = self.fc2(x)
            #output = self.sigmoid(op)
            return output

In [57]:
torch.cuda.is_available()

True

In [58]:
#hyperparameters
epochs = 120
batch_sz = 24

pmodel = Feedforward(15, 20)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(pmodel.parameters(), lr = 0.005)

In [49]:
from sklearn.model_selection import train_test_split
#x = data.values[:,0:-1]
y = data[:, 1] # change here to -1
x = np.delete(data, 1, axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=7)

In [52]:
x_train = torch.FloatTensor(x_train)
x_test = torch.FloatTensor(x_test)

y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

train_data = TensorDataset(x_train,y_train)
train_loader = DataLoader(train_data,batch_size=batch_sz,shuffle=True)         

In [59]:
pmodel.eval()
y_pred = pmodel(x_test)
before_train = criterion(y_pred.squeeze(), y_test)
print('Test loss before training' , before_train.item())

Test loss before training 27.96247673034668


In [60]:
#training 
pmodel.train()
    
for epoch in range(epochs):
    loss = 0
    for x, y in train_loader:
        optimizer.zero_grad()
        # Forward pass
        cls_out= pmodel(x)
        cls_out = cls_out.squeeze()
        # Compute Loss
        c_loss = criterion(cls_out, y)
        c_loss.backward()
        optimizer.step()        
        loss += c_loss.item()    

    #checking model's performance for each epoch
    #with torch.no_grad():            
    #    zs = classifier(dtest)
    #pred = zs.max(1)[1]
    if (epoch % 10 == 0):
        with torch.no_grad():
            y_pred =pmodel(x_train)
            r2acc = r2_score(y_train, y_pred)
            netloss = loss / len(train_loader)
        print('Epoch: {} :-- loss:= {:.3f} :-------: r2_Accuracy:= {:.3f}'.format(epoch, netloss, r2acc))

Epoch: 0 :-- loss:= 2.836 :-------: r2_Accuracy:= 0.701
Epoch: 10 :-- loss:= 0.150 :-------: r2_Accuracy:= 0.880
Epoch: 20 :-- loss:= 0.078 :-------: r2_Accuracy:= 0.925
Epoch: 30 :-- loss:= 0.051 :-------: r2_Accuracy:= 0.945
Epoch: 40 :-- loss:= 0.040 :-------: r2_Accuracy:= 0.964
Epoch: 50 :-- loss:= 0.031 :-------: r2_Accuracy:= 0.973
Epoch: 60 :-- loss:= 0.027 :-------: r2_Accuracy:= 0.982
Epoch: 70 :-- loss:= 0.016 :-------: r2_Accuracy:= 0.986
Epoch: 80 :-- loss:= 0.020 :-------: r2_Accuracy:= 0.986
Epoch: 90 :-- loss:= 0.026 :-------: r2_Accuracy:= 0.987
Epoch: 100 :-- loss:= 0.010 :-------: r2_Accuracy:= 0.977
Epoch: 110 :-- loss:= 0.013 :-------: r2_Accuracy:= 0.992


In [61]:
pmodel.eval()
for it in range(154):
    print('{:.3f} : {:.3f}'.format(pmodel(x_train[it]).squeeze().item(), y_train[it].item()))

3.140 : 3.081
4.661 : 4.754
3.790 : 3.786
5.294 : 5.282
4.219 : 4.138
4.161 : 4.050
5.227 : 5.194
3.229 : 3.169
4.591 : 4.666
4.641 : 4.666
5.782 : 5.810
3.650 : 3.609
3.097 : 3.081
5.127 : 5.106
4.928 : 4.842
5.148 : 5.194
3.459 : 3.433
3.561 : 3.521
5.572 : 5.634
4.743 : 4.754
3.791 : 3.874
3.302 : 3.345
2.885 : 2.905
5.782 : 5.810
5.155 : 5.106
5.703 : 5.722
4.923 : 4.930
5.543 : 5.634
4.173 : 4.138
5.004 : 4.930
3.187 : 3.169
4.829 : 4.754
4.976 : 4.930
2.638 : 2.729
5.767 : 5.810
5.932 : 5.986
5.177 : 5.282
5.237 : 5.194
4.694 : 4.666
3.562 : 3.609
3.577 : 3.521
5.076 : 5.106
5.463 : 5.458
3.006 : 2.905
5.137 : 5.106
4.006 : 3.962
4.785 : 4.754
2.312 : 2.553
3.803 : 3.786
5.792 : 5.810
5.819 : 5.898
2.952 : 2.905
5.548 : 5.546
6.012 : 6.075
4.313 : 4.314
5.334 : 5.282
4.264 : 4.138
5.683 : 5.722
3.822 : 3.786
4.584 : 4.578
4.363 : 4.402
4.429 : 4.402
5.516 : 5.546
4.170 : 4.138
4.924 : 4.842
4.152 : 4.050
6.076 : 6.163
4.076 : 4.050
5.363 : 5.370
4.523 : 4.490
4.746 : 4.754
5.374 

In [63]:
pmodel.eval()
with torch.no_grad():
    y_pred = pmodel(x_test)
    y_pred = y_pred.squeeze().detach().numpy()
    y_predtr = pmodel(x_train)
    y_predtr = y_predtr.squeeze().detach().numpy()

print ('Average R^2 Value on Test: ', r2_score(y_test, y_pred))
print ('Average Mean Absolute Error on Test: ', mean_absolute_error(y_test, y_pred))
print ('Average R^2 Value on Train: ', r2_score(y_train, y_predtr))

Average R^2 Value on Test:  0.9930365517755998
Average Mean Absolute Error on Test:  0.06236893
Average R^2 Value on Train:  0.9944786505151687


Keras Model

In [18]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=7)

In [21]:
# Create the model
model = Sequential()
#First Hidden Layer
model.add(Dense(16, activation='relu', kernel_initializer='random_normal', input_dim=10))
#Output Layer
model.add(Dense(1, activation='linear', kernel_initializer='random_normal'))

opt = Adam(lr=0.005)
model.compile(loss='mean_absolute_error', optimizer=opt, metrics=['mean_squared_error'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 16)                176       
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 17        
Total params: 193
Trainable params: 193
Non-trainable params: 0
_________________________________________________________________


In [22]:
model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=1, validation_split=0.2)



Train on 123 samples, validate on 31 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100


Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x207efcb9438>

In [25]:
test_pred = model.predict(x_test)
test_pred = test_pred.reshape(1, len(test_pred))[0]

train_pred = model.predict(x_train)
train_pred = train_pred.reshape(1, len(train_pred))[0]

print ('Average R^2 Value on Test: ', r2_score(y_test, test_pred))
print ('Average Mean Absolute Error: ', )
print ('Average R^2 Value on Train: ', r2_score(y_train, train_pred))

Average R^2 Value on Test:  0.759547341759286
Average Mean Absolute Error: 
Average R^2 Value on Train:  0.7400547893193266


In [26]:
for it in range(154):
    print(model.predict([x_train[it:it+1]]), y_train[it])

[[64.974304]] 67.67
[[63.102207]] 65.67
[[55.37545]] 61.0
[[61.605816]] 54.0
[[40.068268]] 48.0
[[55.994965]] 51.0
[[40.99834]] 47.5
[[41.572384]] 41.0
[[28.04099]] 34.0
[[33.642048]] 38.0
[[39.752804]] 36.5
[[40.385723]] 39.5
[[46.040916]] 38.0
[[48.302635]] 39.33
[[37.109974]] 42.0
[[46.996178]] 45.0
[[44.091393]] 60.5
[[37.62906]] 36.5
[[42.832985]] 45.0
[[49.059956]] 41.0
[[55.679375]] 60.5
[[40.281372]] 43.33
[[43.338715]] 42.0
[[34.717087]] 34.0
[[55.42656]] 55.0
[[59.968407]] 54.0
[[38.101692]] 32.0
[[62.14511]] 65.67
[[60.661026]] 62.67
[[52.68255]] 45.0
[[53.997406]] 52.0
[[62.08177]] 54.33
[[42.200096]] 40.67
[[56.689583]] 55.0
[[61.573357]] 64.0
[[44.523132]] 47.0
[[51.168007]] 52.0
[[53.58818]] 55.5
[[44.27864]] 52.0
[[29.296204]] 36.33
[[61.38888]] 55.0
[[62.40056]] 53.5
[[54.49649]] 48.0
[[59.362606]] 61.5
[[55.181557]] 59.0
[[52.235023]] 47.5
[[35.67329]] 40.0
[[37.00798]] 30.5
[[37.218445]] 39.67
[[52.315098]] 47.5
[[52.03641]] 46.5
[[66.134125]] 65.67
[[43.12956]] 45.3

# Data Scrubbing

In [8]:
lt1 = [col for col in data.columns if len(data[col].unique()) == 1]
#lt1.append('S.No.') #removing unnecessary columns
print(lt1)
data= data.drop(columns= lt1)

nextdrop = ['Day Part 1\n(<12 pm)', 'Day Part 2\n(12-2 pm)','Day Part 3\n(>2 pm)', 'view', 'S.No.']
data= data.drop(columns= nextdrop)
data

facescat = pd.get_dummies(data['Location'])
data = pd.concat([data,facescat],axis=1).drop(['Location'], axis=1)
data

['Stockpile', 'Humidity', 'Chemical Spray', 'Stock Pile Type']


Unnamed: 0,Date,Time,Temperature,wind_direction,Ambient Temperature (in °C),Delta Temp (Y),Log (Y),Temperature - 1 (in °C),Delta Temp (Y-1),North,East,West,South,Wind Impact (Should be linear variation & wind speed),FACE-A,FACE-B,FACE-C
0,2018-02-01,10:30:00,44,165,27,17,2.833213,40,13,0,0,1,0,0.000000,1,0,0
1,2018-02-01,10:30:00,51,165,27,24,3.178054,43,16,0,0,1,0,0.000000,0,1,0
2,2018-02-01,10:30:00,53,165,27,26,3.258097,49,22,0,0,0,1,0.965926,1,0,0
3,2018-02-01,10:30:00,61,165,27,34,3.526361,58,31,0,0,0,1,0.965926,0,1,0
4,2018-02-01,10:30:00,65,165,27,38,3.637586,62,35,0,0,0,1,0.965926,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,2018-02-10,15:30:00,41,180,27,14,2.639057,37,10,0,0,0,1,1.000000,1,0,0
475,2018-02-10,15:30:00,35,180,27,8,2.079442,36,9,0,0,0,1,1.000000,0,1,0
476,2018-02-10,15:30:00,32,180,27,5,1.609438,44,17,0,1,0,0,0.000000,1,0,0
477,2018-02-10,15:30:00,32,180,27,5,1.609438,38,11,1,0,0,0,0.000000,0,1,0


In [9]:
data.to_csv("stockpile.csv", index=False)

In [10]:
ddf = pd.read_csv('stockpile.csv', header=0)
ddf

Unnamed: 0,Date,Time,Temperature,wind_direction,Ambient Temperature (in °C),Delta Temp (Y),Log (Y),Temperature - 1 (in °C),Delta Temp (Y-1),North,East,West,South,Wind Impact (Should be linear variation & wind speed),FACE-A,FACE-B,FACE-C
0,2018-02-01,10:30:00,44,165,27,17,2.833213,40,13,0,0,1,0,0.000000,1,0,0
1,2018-02-01,10:30:00,51,165,27,24,3.178054,43,16,0,0,1,0,0.000000,0,1,0
2,2018-02-01,10:30:00,53,165,27,26,3.258097,49,22,0,0,0,1,0.965926,1,0,0
3,2018-02-01,10:30:00,61,165,27,34,3.526361,58,31,0,0,0,1,0.965926,0,1,0
4,2018-02-01,10:30:00,65,165,27,38,3.637586,62,35,0,0,0,1,0.965926,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,2018-02-10,15:30:00,41,180,27,14,2.639057,37,10,0,0,0,1,1.000000,1,0,0
475,2018-02-10,15:30:00,35,180,27,8,2.079442,36,9,0,0,0,1,1.000000,0,1,0
476,2018-02-10,15:30:00,32,180,27,5,1.609438,44,17,0,1,0,0,0.000000,1,0,0
477,2018-02-10,15:30:00,32,180,27,5,1.609438,38,11,1,0,0,0,0.000000,0,1,0
