In [1]:
# https://machinelearningmastery.com/pytorch-tutorial-develop-deep-learning-models/
# https://colab.research.google.com/github/rpi-techfundamentals/website_spring_2020/blob/master/content/notebooks/20-deep-learning1/06-regression-bh-pytorch.ipynb#scrollTo=xD9PhAU7hoqT
#!pip install torchvision
import numpy as np
import pandas as pd
import pyreadr
import scipy
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.utils import shuffle
from torch.autograd import Variable


In [2]:
class Net(torch.nn.Module):
    def __init__(self, cols, size_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(cols, size_hidden)   # hidden layer
        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

In [3]:
def build_cnn(TrainSet_eonr2_df, policy):
    y_train = TrainSet_eonr2_df['eonr']
    X_train = TrainSet_eonr2_df.drop('eonr', axis=1)
    #Define training hyperprameters.
    batch_size = 50
    num_epochs = 200
    learning_rate = 0.01
    size_hidden= 100
    
    #Calculate some other hyperparameters based on data.  
    batch_no = len(X_train) // batch_size  #batches
    cols=X_train.shape[1] #Number of columns in input matrix
    n_output=1
    #Create the model
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # Assume that we are on a CUDA machine, then this should print a CUDA device:
    print("Executing the model on :",device)
    
    net = Net(cols, size_hidden, n_output)
    #Adam is a specific flavor of gradient decent which is typically better
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    #optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
    criterion = torch.nn.MSELoss(size_average=False)  # this is for regression mean squared loss
    X_train=X_train.values
    y_train=y_train.values
    running_loss = 0.0
    for epoch in range(num_epochs):
        #Shuffle just mixes up the dataset between epocs
        X_train, y_train = shuffle(X_train, y_train)
        # Mini batch learning
        for i in range(batch_no):
            start = i * batch_size
            end = start + batch_size
            inputs = Variable(torch.FloatTensor(X_train[start:end]))
            labels = Variable(torch.FloatTensor(y_train[start:end]))
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            #print("outputs",outputs)
            #print("outputs",outputs,outputs.shape,"labels",labels, labels.shape)
            loss = criterion(outputs, torch.unsqueeze(labels,dim=1))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        #print('Epoch {}'.format(epoch+1), "loss: ",running_loss)
        running_loss = 0.0
        path = '/home/germanm2/n_policy_box/Data/files_rds/cnn_models/'+ policy + '.pth'
        torch.save(net.state_dict(), path)
        return(net)

In [4]:
# Build one CNN
TrainSet_eonr2_df = pyreadr.read_r("/home/germanm2/n_policy_box/Data/files_rds/TrainSet_eonr2.rds")[None] # also works for RData
policy = 'ratio_5'
net_returned = build_cnn(TrainSet_eonr2_df, policy)
net_returned

Executing the model on : cuda:0




Net(
  (hidden): Linear(in_features=21, out_features=100, bias=True)
  (predict): Linear(in_features=100, out_features=1, bias=True)
)

In [6]:
def predict_cnn(prediction_set_aggregated_dt, policy, net):
    #Initialize the eonr model:
    cols=21 #Number of columns in input matrix        
    n_output=1        
    size_hidden= 100  
    
    #Load the cnn model
    #path = '/home/germanm2/n_policy_box/Data/files_rds/cnn_models/'+ policy + '.pth'
    # net = torch.load(path)
    #net.load_state_dict(torch.load(path))
    #net.eval()
    
    #Get X data ready
    X_pred=prediction_set_aggregated_dt.values
    X = Variable(torch.FloatTensor(X_pred)) 
  
    #Make predictions
    y_pred = net(X) #This outputs the value for regression

    y_pred=y_pred.data[:,0].numpy()

    prediction_set_aggregated_dt['eonr_pred'] = y_pred
    return(prediction_set_aggregated_dt)

In [5]:
policy = 'ratio_5'
#Load data
prediction_set_aggregated_dt = pyreadr.read_r("/home/germanm2/n_policy_box/Data/files_rds/prediction_set_aggregated_dt.rds")[None] # also works for RData
prediction_set_aggregated_dt = prediction_set_aggregated_dt[['rain_30', 'rain_60','rain_90', 't_max_30', 't_max_60', 't_max_90', 't_min_30', 't_min_60', 't_min_90', 'Y_prev',
    'Y_corn_lt_avg', 'day_sow', 'day_v5', 'lai_v5', 'whc', 'oc_20cm_v5', 'sw_dep_v5', 'n_0_60cm_v5', 'surfaceom_wt_v5', 'sand_40cm', 'clay_40cm']]

# 

In [8]:
predict_cnn(prediction_set_aggregated_dt, policy, net_returned)

Unnamed: 0,rain_30,rain_60,rain_90,t_max_30,t_max_60,t_max_90,t_min_30,t_min_60,t_min_90,Y_prev,...,day_v5,lai_v5,whc,oc_20cm_v5,sw_dep_v5,n_0_60cm_v5,surfaceom_wt_v5,sand_40cm,clay_40cm,eonr_pred
0,125.0,229.0,17.0,22.050000,15.216667,8.250000,9.900000,3.533333,-4.633333,4573.630436,...,152.0,0.408,294.075688,1.620588,629.483784,35.048204,465.378891,5.071375,21.024234,182.093628
1,140.0,108.0,19.0,22.133333,15.933333,3.383333,10.300000,2.950000,-6.650000,4234.758929,...,147.0,0.402,294.075688,1.625338,666.127141,42.310082,511.010443,5.071375,21.024234,176.533569
2,96.0,101.0,39.0,20.666667,16.783333,9.250000,7.883333,3.533333,-2.416667,4464.527031,...,157.0,0.406,294.075688,1.622531,620.573678,46.126236,497.375201,5.071375,21.024234,178.231461
3,96.0,47.0,33.0,22.016667,17.066667,9.600000,9.333333,3.066667,-2.333333,4179.852874,...,157.0,0.397,294.075688,1.620151,629.544435,70.271481,462.792524,5.071375,21.024234,174.403366
4,43.0,183.0,51.0,23.916667,12.283333,8.650000,9.416667,1.466667,-2.366667,4362.798253,...,147.0,0.398,294.075688,1.623718,623.843297,47.598318,721.119533,5.071375,21.024234,180.963776
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60362,251.0,117.0,53.0,23.083333,15.950000,14.833333,11.316667,5.333333,2.866667,3710.355172,...,123.0,0.332,283.620092,1.141886,709.097091,42.566269,207.000000,20.469739,19.590676,164.336716
60363,138.0,79.0,107.0,22.150000,17.666667,10.900000,11.200000,5.500000,-0.166667,3576.472499,...,125.0,0.314,283.620092,1.140929,654.037549,29.381942,143.842076,20.469739,19.590676,166.382339
60364,141.0,124.0,120.0,21.766667,12.533333,7.133333,10.133333,2.866667,-2.766667,4670.014531,...,129.0,0.289,283.620092,1.141471,664.828798,27.620442,211.432364,20.469739,19.590676,171.695358
60365,190.0,16.0,132.0,21.750000,20.883333,9.366667,9.383333,5.316667,-2.350000,4638.185289,...,128.0,0.320,283.620092,1.137555,670.810914,28.431346,94.554173,20.469739,19.590676,168.055328


In [9]:
policy = 'ratio_5'
path = '/home/germanm2/n_policy_box/Data/files_rds/cnn_models/'+ policy + '.pth'
net2 = Net(21, 100, 1)
net2.state_dict()


OrderedDict([('hidden.weight',
              tensor([[-0.0297, -0.1166,  0.1599,  ...,  0.0611,  0.0683, -0.1019],
                      [ 0.0572,  0.0094, -0.0251,  ...,  0.1406, -0.0022, -0.1195],
                      [-0.1941, -0.0422, -0.1658,  ...,  0.0566, -0.0864,  0.0334],
                      ...,
                      [-0.0111, -0.0164, -0.0520,  ...,  0.1685,  0.2119,  0.0752],
                      [ 0.1124, -0.0167,  0.1739,  ...,  0.2027,  0.0721,  0.0609],
                      [ 0.1063, -0.0288,  0.1322,  ..., -0.1265,  0.0336,  0.1667]])),
             ('hidden.bias',
              tensor([-0.1202, -0.1990,  0.1582,  0.1973, -0.0834, -0.1861, -0.0431, -0.0993,
                       0.0276,  0.1098,  0.0045,  0.1099,  0.1793,  0.2092,  0.2133, -0.0811,
                       0.0279,  0.0737, -0.1619, -0.0896,  0.1100,  0.1916,  0.0935,  0.1362,
                      -0.0465,  0.0536, -0.1766, -0.0758,  0.0827, -0.1536, -0.0397,  0.1075,
                       0.1816,

In [10]:
net2.load_state_dict(torch.load(path))
net2.eval()
net2.state_dict()

OrderedDict([('hidden.weight',
              tensor([[-0.0676, -0.0373, -0.0625,  ..., -0.0039, -0.2168,  0.0727],
                      [-0.1252, -0.0744, -0.0562,  ..., -0.1339, -0.1475,  0.1869],
                      [ 0.1879, -0.0744, -0.0616,  ..., -0.1756,  0.0499, -0.2314],
                      ...,
                      [-0.1955,  0.1512, -0.1365,  ...,  0.0670, -0.1679,  0.0434],
                      [ 0.1207,  0.0385,  0.1566,  ...,  0.0703,  0.0622,  0.0010],
                      [ 0.1169,  0.1795, -0.0860,  ..., -0.1801, -0.1532, -0.1962]])),
             ('hidden.bias',
              tensor([-0.0537,  0.1449, -0.1245,  0.0153,  0.0911, -0.1218, -0.0173,  0.0280,
                       0.0580, -0.3124, -0.1526, -0.1387,  0.1176,  0.0327, -0.1876,  0.1053,
                      -0.1954,  0.1189,  0.2141, -0.1075, -0.2272, -0.0854, -0.2294,  0.0227,
                       0.0784,  0.0967,  0.0996, -0.0641, -0.1181,  0.0893,  0.1770, -0.0827,
                      -0.2538,

In [11]:
net_returned.state_dict()

OrderedDict([('hidden.weight',
              tensor([[-0.0676, -0.0373, -0.0625,  ..., -0.0039, -0.2168,  0.0727],
                      [-0.1252, -0.0744, -0.0562,  ..., -0.1339, -0.1475,  0.1869],
                      [ 0.1879, -0.0744, -0.0616,  ..., -0.1756,  0.0499, -0.2314],
                      ...,
                      [-0.1955,  0.1512, -0.1365,  ...,  0.0670, -0.1679,  0.0434],
                      [ 0.1207,  0.0385,  0.1566,  ...,  0.0703,  0.0622,  0.0010],
                      [ 0.1169,  0.1795, -0.0860,  ..., -0.1801, -0.1532, -0.1962]])),
             ('hidden.bias',
              tensor([-0.0537,  0.1449, -0.1245,  0.0153,  0.0911, -0.1218, -0.0173,  0.0280,
                       0.0580, -0.3124, -0.1526, -0.1387,  0.1176,  0.0327, -0.1876,  0.1053,
                      -0.1954,  0.1189,  0.2141, -0.1075, -0.2272, -0.0854, -0.2294,  0.0227,
                       0.0784,  0.0967,  0.0996, -0.0641, -0.1181,  0.0893,  0.1770, -0.0827,
                      -0.2538,

In [16]:
#Get X data ready
X_pred=prediction_set_aggregated_dt.values
X = Variable(torch.FloatTensor(X_pred)) 
X
#Make predictions
# y_pred = net(X) #This outputs the value for regression
net_returned(X)

RuntimeError: size mismatch, m1: [60367 x 22], m2: [21 x 100] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:136

In [None]:
 Initialize the maturity prediction model:
        model = MaturityPrediction()
        
        for model_code, model_field in field_codes.items():
            # Define the location of the saved weigths:
            model_file_name = f'../data/{model_code}_{fold}_rnd_1_noaug.pth'

            # Load model parameters:
            model.load_state_dict(torch.load(model_file_name))

            # Load the model into the GPU:
            model = model.to(device)
            model.eval()

In [None]:
model = Net()
model.load_state_dict(torch.load(path))

In [None]:
X_pred = prediction_set_aggregated_df[['rain_30', 'rain_60','rain_90', 't_max_30', 't_max_60', 't_max_90', 't_min_30', 't_min_60', 't_min_90', 'Y_prev',
 'Y_corn_lt_avg', 'day_sow', 'day_v5', 'lai_v5', 'whc', 'oc_20cm_v5', 'sw_dep_v5', 'n_0_60cm_v5', 'surfaceom_wt_v5', 'sand_40cm', 'clay_40cm']]
X_pred=X_pred.values

X = Variable(torch.FloatTensor(X_pred)) 
y_pred = net(X) #This outputs the value for regression
y_pred=y_pred.data[:,0].numpy()

prediction_set_aggregated_df['eonr_pred'] = y_pred


In [None]:
#import pandas as pd
X = Variable(torch.FloatTensor(X_train)) 
result = net(X)
# pred=result.data[:,0].numpy()
# print(len(pred),len(y_train))
# r2_score(pred,y_train)

In [None]:
X_pred = prediction_set_aggregated_df[['rain_30', 'rain_60','rain_90', 't_max_30', 't_max_60', 't_max_90', 't_min_30', 't_min_60', 't_min_90', 'Y_prev',
 'Y_corn_lt_avg', 'day_sow', 'day_v5', 'lai_v5', 'whc', 'oc_20cm_v5', 'sw_dep_v5', 'n_0_60cm_v5', 'surfaceom_wt_v5', 'sand_40cm', 'clay_40cm']]
X_pred=X_pred.values
X_pred

In [None]:
# MAKE PREDICTIONS
X = Variable(torch.FloatTensor(X_pred)) 
y_pred = net(X) #This outputs the value for regression
y_pred=y_pred.data[:,0].numpy()
y_pred

In [None]:
prediction_set_aggregated_df['eonr_pred'] = y_pred
prediction_set_aggregated_df.head

In [None]:
# now let's write a Rds
pyreadr.write_rds("/home/germanm2/n_policy_box/Data/files_rds/prediction_set_aggregated_cnn_df.rds", prediction_set_aggregated_df)