In [1]:
# https://machinelearningmastery.com/pytorch-tutorial-develop-deep-learning-models/
# https://colab.research.google.com/github/rpi-techfundamentals/website_spring_2020/blob/master/content/notebooks/20-deep-learning1/06-regression-bh-pytorch.ipynb#scrollTo=xD9PhAU7hoqT
#!pip install torchvision
import numpy as np
import pandas as pd
import pyreadr
import scipy
#Define the model 
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.utils import shuffle
from torch.autograd import Variable

In [16]:
# check pytorch version
import torch
print(torch.__version__)

In [2]:
class Net(torch.nn.Module):
    def __init__(self, cols, size_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(cols, size_hidden)   # hidden layer
        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

In [3]:
def build_cnn(TrainSet_eonr2_df, policy):
    #Define training hyperprameters.
    batch_size = 50
    num_epochs = 200
    learning_rate = 0.01
    size_hidden= 100
    
    y_train = TrainSet_eonr2_df['eonr']
    X_train = TrainSet_eonr2_df.drop('eonr', axis=1)

    X_train=X_train.values
    y_train=y_train.values

    #Calculate some other hyperparameters based on data.  
    batch_no = len(X_train) // batch_size  #batches
    cols=X_train.shape[1] #Number of columns in input matrix
    n_output=1

    #Create the model
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # Assume that we are on a CUDA machine, then this should print a CUDA device:
    print("Executing the model on :",device)

    net = Net(cols, size_hidden, n_output)

    #Adam is a specific flavor of gradient decent which is typically better
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    #optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
    criterion = torch.nn.MSELoss(size_average=False)  # this is for regression mean squared loss

    running_loss = 0.0
    for epoch in range(num_epochs):
        #Shuffle just mixes up the dataset between epocs
        X_train, y_train = shuffle(X_train, y_train)
        # Mini batch learning
        for i in range(batch_no):
            start = i * batch_size
            end = start + batch_size
            inputs = Variable(torch.FloatTensor(X_train[start:end]))
            labels = Variable(torch.FloatTensor(y_train[start:end]))
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            outputs = net(inputs)
            # calculate loss
            loss = criterion(outputs, torch.unsqueeze(labels,dim=1))
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        #print('Epoch {}'.format(epoch+1), "loss: ",running_loss)
        running_loss = 0.0
    path = '/home/germanm2/n_policy_box/Data/files_rds/cnn_models/'+ policy + '.pth'
    torch.save(net.state_dict(), path)
    return(net)
        

In [4]:
#Build one cnn using the function
TrainSet_eonr2_df = pyreadr.read_r("/home/germanm2/n_policy_box/Data/files_rds/TrainSet_eonr2.rds")[None] # also works for RData
net_return = build_cnn(TrainSet_eonr2_df, 'ratio_5')

Executing the model on : cuda:0




In [5]:
#Make predictions
prediction_set_aggregated_df = pyreadr.read_r("/home/germanm2/n_policy_box/Data/files_rds/prediction_set_aggregated_dt.rds")[None] # also works for RData

X_pred = prediction_set_aggregated_df[['rain_30', 'rain_60','rain_90', 't_max_30', 't_max_60', 't_max_90', 't_min_30', 't_min_60', 't_min_90', 'Y_prev',
 'Y_corn_lt_avg', 'day_sow', 'day_v5', 'lai_v5', 'whc', 'oc_20cm_v5', 'sw_dep_v5', 'n_0_60cm_v5', 'surfaceom_wt_v5', 'sand_40cm', 'clay_40cm']]

X_pred=X_pred.values

X = Variable(torch.FloatTensor(X_pred)) 
y_pred = net_return(X) #This outputs the value for regression
y_pred=y_pred.data[:,0].numpy()
y_pred
prediction_set_aggregated_df['eonr_pred'] = y_pred

# now let's write a Rds
pyreadr.write_rds("/home/germanm2/n_policy_box/Data/files_rds/prediction_set_aggregated_cnn_dt.rds", prediction_set_aggregated_df)
prediction_set_aggregated_df.head()

Unnamed: 0,id_10,id_field,region,z,rain_30,rain_60,rain_90,t_max_30,t_max_60,t_max_90,...,lai_v5,whc,oc_20cm_v5,sw_dep_v5,n_0_60cm_v5,surfaceom_wt_v5,sand_40cm,clay_40cm,area_ha,eonr_pred
0,5,3.0,3.0,11,125.0,229.0,17.0,22.05,15.216667,8.25,...,0.408,294.075688,1.620588,629.483784,35.048204,465.378891,5.071375,21.024234,40.0,154.232986
1,5,3.0,3.0,13,140.0,108.0,19.0,22.133333,15.933333,3.383333,...,0.402,294.075688,1.625338,666.127141,42.310082,511.010443,5.071375,21.024234,40.0,211.74971
2,5,3.0,3.0,15,96.0,101.0,39.0,20.666667,16.783333,9.25,...,0.406,294.075688,1.622531,620.573678,46.126236,497.375201,5.071375,21.024234,40.0,200.675522
3,5,3.0,3.0,17,96.0,47.0,33.0,22.016667,17.066667,9.6,...,0.397,294.075688,1.620151,629.544435,70.271481,462.792524,5.071375,21.024234,40.0,203.07077
4,5,3.0,3.0,19,43.0,183.0,51.0,23.916667,12.283333,8.65,...,0.398,294.075688,1.623718,623.843297,47.598318,721.119533,5.071375,21.024234,40.0,184.873901


In [6]:
#Load the saved model
policy = 'ratio_5'
path = '/home/germanm2/n_policy_box/Data/files_rds/cnn_models/'+ policy + '.pth'
net_load = Net(21, 100, 1)
net_load.load_state_dict(torch.load(path))
net_load.eval()
net_load.state_dict()


OrderedDict([('hidden.weight',
              tensor([[-1.3746, -0.7335, -1.4251,  ...,  0.0869, -0.4680, -0.2601],
                      [ 0.0545, -0.1518, -0.1721,  ...,  0.0531, -0.1213,  0.0527],
                      [ 0.1223,  0.5081, -0.8659,  ..., -0.0530, -0.1435,  0.0357],
                      ...,
                      [-0.4103, -0.4308, -0.0075,  ..., -0.2815, -0.3362, -0.2489],
                      [-0.4272, -0.9284,  0.5645,  ...,  0.3644, -0.1335, -0.2681],
                      [-0.1259,  0.1274, -0.6016,  ...,  0.0117, -0.1735, -0.2937]])),
             ('hidden.bias',
              tensor([ 1.0803e-01, -1.9354e-01, -6.4940e-01, -7.2536e-01, -2.1761e-01,
                       1.7477e-01,  7.3056e-02, -4.1650e-01,  1.2858e-01, -9.4788e-04,
                       6.6519e-02,  1.5888e-01,  1.6248e-02, -1.2759e-01, -4.6698e-01,
                       2.9493e-02, -1.3119e-01,  1.2768e-01, -1.0650e-01,  8.4814e-02,
                       2.0106e-01, -3.1033e-01, -9.5089e-0

In [7]:
#Make predictions
prediction_set_aggregated_df = pyreadr.read_r("/home/germanm2/n_policy_box/Data/files_rds/prediction_set_aggregated_dt.rds")[None] # also works for RData

X_pred = prediction_set_aggregated_df[['rain_30', 'rain_60','rain_90', 't_max_30', 't_max_60', 't_max_90', 't_min_30', 't_min_60', 't_min_90', 'Y_prev',
 'Y_corn_lt_avg', 'day_sow', 'day_v5', 'lai_v5', 'whc', 'oc_20cm_v5', 'sw_dep_v5', 'n_0_60cm_v5', 'surfaceom_wt_v5', 'sand_40cm', 'clay_40cm']]
X_pred=X_pred.values
X_pred
X = Variable(torch.FloatTensor(X_pred)) 
y_pred = net_load(X) #This outputs the value for regression
y_pred=y_pred.data[:,0].numpy()
y_pred
prediction_set_aggregated_df['eonr_pred'] = y_pred

# now let's write a Rds
pyreadr.write_rds("/home/germanm2/n_policy_box/Data/files_rds/prediction_set_aggregated_cnn_dt.rds", prediction_set_aggregated_df)
prediction_set_aggregated_df.head()

Unnamed: 0,id_10,id_field,region,z,rain_30,rain_60,rain_90,t_max_30,t_max_60,t_max_90,...,lai_v5,whc,oc_20cm_v5,sw_dep_v5,n_0_60cm_v5,surfaceom_wt_v5,sand_40cm,clay_40cm,area_ha,eonr_pred
0,5,3.0,3.0,11,125.0,229.0,17.0,22.05,15.216667,8.25,...,0.408,294.075688,1.620588,629.483784,35.048204,465.378891,5.071375,21.024234,40.0,154.232986
1,5,3.0,3.0,13,140.0,108.0,19.0,22.133333,15.933333,3.383333,...,0.402,294.075688,1.625338,666.127141,42.310082,511.010443,5.071375,21.024234,40.0,211.74971
2,5,3.0,3.0,15,96.0,101.0,39.0,20.666667,16.783333,9.25,...,0.406,294.075688,1.622531,620.573678,46.126236,497.375201,5.071375,21.024234,40.0,200.675522
3,5,3.0,3.0,17,96.0,47.0,33.0,22.016667,17.066667,9.6,...,0.397,294.075688,1.620151,629.544435,70.271481,462.792524,5.071375,21.024234,40.0,203.07077
4,5,3.0,3.0,19,43.0,183.0,51.0,23.916667,12.283333,8.65,...,0.398,294.075688,1.623718,623.843297,47.598318,721.119533,5.071375,21.024234,40.0,184.873901


In [8]:
prediction_set_aggregated_df = pyreadr.read_r("/home/germanm2/n_policy_box/Data/files_rds/prediction_set_aggregated_dt.rds")[None]
prediction_set_aggregated_df.head()

Unnamed: 0,id_10,id_field,region,z,rain_30,rain_60,rain_90,t_max_30,t_max_60,t_max_90,...,day_v5,lai_v5,whc,oc_20cm_v5,sw_dep_v5,n_0_60cm_v5,surfaceom_wt_v5,sand_40cm,clay_40cm,area_ha
0,5,3.0,3.0,11,125.0,229.0,17.0,22.05,15.216667,8.25,...,152.0,0.408,294.075688,1.620588,629.483784,35.048204,465.378891,5.071375,21.024234,40.0
1,5,3.0,3.0,13,140.0,108.0,19.0,22.133333,15.933333,3.383333,...,147.0,0.402,294.075688,1.625338,666.127141,42.310082,511.010443,5.071375,21.024234,40.0
2,5,3.0,3.0,15,96.0,101.0,39.0,20.666667,16.783333,9.25,...,157.0,0.406,294.075688,1.622531,620.573678,46.126236,497.375201,5.071375,21.024234,40.0
3,5,3.0,3.0,17,96.0,47.0,33.0,22.016667,17.066667,9.6,...,157.0,0.397,294.075688,1.620151,629.544435,70.271481,462.792524,5.071375,21.024234,40.0
4,5,3.0,3.0,19,43.0,183.0,51.0,23.916667,12.283333,8.65,...,147.0,0.398,294.075688,1.623718,623.843297,47.598318,721.119533,5.071375,21.024234,40.0


In [11]:
def predict_cnn(prediction_set_aggregated_df, policy):
    #Load the saved model
    #policy = 'ratio_5'
    path = '/home/germanm2/n_policy_box/Data/files_rds/cnn_models/'+ policy + '.pth'
    net_load = Net(21, 100, 1)
    net_load.load_state_dict(torch.load(path))
    net_load.eval()
    net_load.state_dict()
    X_pred = prediction_set_aggregated_df[['rain_30', 'rain_60','rain_90', 't_max_30', 't_max_60', 't_max_90', 't_min_30', 't_min_60', 't_min_90', 'Y_prev',
 'Y_corn_lt_avg', 'day_sow', 'day_v5', 'lai_v5', 'whc', 'oc_20cm_v5', 'sw_dep_v5', 'n_0_60cm_v5', 'surfaceom_wt_v5', 'sand_40cm', 'clay_40cm']]
    X_pred=X_pred.values
    X_pred
    X = Variable(torch.FloatTensor(X_pred)) 
    y_pred = net_load(X) #This outputs the value for regression
    y_pred=y_pred.data[:,0].numpy()
    y_pred
    prediction_set_aggregated_df['eonr_pred'] = y_pred
    return(prediction_set_aggregated_df)


In [12]:
prediction_set_aggregated_df2 = predict_cnn(prediction_set_aggregated_df, 'ratio_5')
prediction_set_aggregated_df2

Unnamed: 0,id_10,id_field,region,z,rain_30,rain_60,rain_90,t_max_30,t_max_60,t_max_90,...,lai_v5,whc,oc_20cm_v5,sw_dep_v5,n_0_60cm_v5,surfaceom_wt_v5,sand_40cm,clay_40cm,area_ha,eonr_pred
0,5,3.0,3.0,11,125.0,229.0,17.0,22.050000,15.216667,8.250000,...,0.408,294.075688,1.620588,629.483784,35.048204,465.378891,5.071375,21.024234,40.0,154.232986
1,5,3.0,3.0,13,140.0,108.0,19.0,22.133333,15.933333,3.383333,...,0.402,294.075688,1.625338,666.127141,42.310082,511.010443,5.071375,21.024234,40.0,211.749710
2,5,3.0,3.0,15,96.0,101.0,39.0,20.666667,16.783333,9.250000,...,0.406,294.075688,1.622531,620.573678,46.126236,497.375201,5.071375,21.024234,40.0,200.675522
3,5,3.0,3.0,17,96.0,47.0,33.0,22.016667,17.066667,9.600000,...,0.397,294.075688,1.620151,629.544435,70.271481,462.792524,5.071375,21.024234,40.0,203.070770
4,5,3.0,3.0,19,43.0,183.0,51.0,23.916667,12.283333,8.650000,...,0.398,294.075688,1.623718,623.843297,47.598318,721.119533,5.071375,21.024234,40.0,184.873901
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60362,1526,1.0,1.0,29,251.0,117.0,53.0,23.083333,15.950000,14.833333,...,0.332,283.620092,1.141886,709.097091,42.566269,207.000000,20.469739,19.590676,40.0,211.410553
60363,1526,1.0,1.0,3,138.0,79.0,107.0,22.150000,17.666667,10.900000,...,0.314,283.620092,1.140929,654.037549,29.381942,143.842076,20.469739,19.590676,40.0,276.436218
60364,1526,1.0,1.0,5,141.0,124.0,120.0,21.766667,12.533333,7.133333,...,0.289,283.620092,1.141471,664.828798,27.620442,211.432364,20.469739,19.590676,40.0,222.459717
60365,1526,1.0,1.0,7,190.0,16.0,132.0,21.750000,20.883333,9.366667,...,0.320,283.620092,1.137555,670.810914,28.431346,94.554173,20.469739,19.590676,40.0,287.129211
