# Carbon Emission Prediction Model

In [3]:
from datetime import date
import time
import torch # pip install torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
import os
from ipynb.fs.defs.train_benchmark import process_data; # pip install ipynb
import matplotlib.pyplot as plt
%matplotlib inline

## Dataset load and preprocess

In [5]:
# Set dataset locations
selected_output_variable = "NO2"
data_file = "01_Data/02_Imagery/data_and_imagery_test.pkl"

In [6]:
X, y, res, num_channels, m = process_data(data_file, selected_output_variable) # from benchmark code
# reshape and convert datatype
X = torch.from_numpy(X.T).to(torch.float)
y = torch.from_numpy(y.T).to(torch.float)

# split dataset
percent_train = 0.80
num_train = int(percent_train * m)
num_test = m - num_train
print('num training samples: ', num_train)
print('num testing samples: ', num_test)

X_train, X_test = torch.utils.data.random_split(X, [num_train, num_test])
y_train, y_test = torch.utils.data.random_split(y, [num_train, num_test])

# convert from Subset to Tensor type
X_train, X_test = X_train.dataset[X_train.indices], X_test.dataset[X_test.indices]
y_train, y_test = y_train.dataset[y_train.indices], y_test.dataset[y_test.indices]

ValueError: unsupported pickle protocol: 5

## Define model

In [22]:
class Net(nn.Module):
    """
    Define the neural network: 1 hidden layer with ReLU activation
    """
    def __init__(self, res, num_channels, m):
        """
        Define the network layers
        """
        super(Net,self).__init__()
        self.layer1 = nn.Linear(res*res*num_channels, m) # computes W^T X + b
        
    def forward(self, x):
        """
        Define forward pass
        """
        x = torch.nn.functional.relu(self.layer1(x)) # ReLU activation
        return x

## Train model

In [None]:
def plot_learning(train_loss, test_loss):
    plt.plot(train_loss)
    plt.plot(test_loss)
    plt.legend(['Train loss', 'Test loss'])
    plt.show()
    plt.clf()

In [23]:
def train(x_train, y_train, x_test, y_test, num_epochs, save=False, model_tag=''):
    """
    Train model on training data. If save=True, trained model is saved in ./03_Trained_Models/NN/model_date_<model_tag>.pt
    Input:
        x data and labels
    Output:
        trained model
    """
    print('Training...')
    t0 = time.time()
    train_losses = []
    test_losses = []
    optimizer.zero_grad() # clear gradients
    for epoch in range(num_epochs):
        output = net(x_train)
        loss = criterion(output, y_train)
        if (epoch%10 == 0): # save every 10th loss
            # Train losses
            train_losses.append(loss)
            print('\tloss at epoch %i = %f' %(epoch, loss))
            # Track test losses
            test_output = net(x_test)
            test_loss = criterion(test_output, y_test).item()
            test_losses.append(test_loss)
            print('\ttest loss at epoch %i = %f\n' %(epoch, test_loss))
        loss.backward()
        optimizer.step()

    # Plot training and test loss
    plot_learning(train_losses, test_losses)
    
    # save trained model and training details
    if (save):
        train_date = date.today()
        folder_path = os.path.join('03_Trained_Models', 'NN', 'model_%s_%s' %(train_date, model_tag))
        if (not os.path.exists(folder_path)):
            os.mkdir(folder_path)
        
        # save trained model
        path = os.path.join(folder_path, 'model_%s_%s.pt' %(train_date, model_tag))
        torch.save(net.state_dict(), path)
        
        # save training details
        path = os.path.join(folder_path, 'model_%s_%s_training.txt' %(train_date, model_tag))
        with open(path, 'w') as f:
            f.write("Size of x_train: %s" %(str(x_train.shape)))
            f.write("\nSize of y_train: %s\n" %(str(y_train.shape)))
            f.write("\n Train loss")
            for i, loss in enumerate(train_losses):
                f.write("\nLoss at epoch %i: %f" %(i*10, loss))
            f.write("\n Test loss")
            for i, loss in enumerate(test_losses):
                f.write("\nTest Loss at epoch %i: %f" %(i*10, loss))
            f.write("\n\nTotal train time: %fs" %(time.time() - t0))
        
    return net

## Define hyperparameters

In [24]:
num_epochs = 100 # number iterations for gradient descent

# Varying learning rates
alphas = [0.0001, 0.0005, 0.001, 0.01, 0.05, 0.1]

# Varying number of layers
#Ls = [1, 5, 10, 20, 50, 75]

# Varying optimizers
optimizers = ['SGD', 'Adam']

# Number of iterations 
num_epochs_ = [100, 250, 500, 1000] 

## Main

In [None]:
# Initialize network
net = Net(res, num_channels, m)
L = len(list(net.parameters())) # number of layers
# print(net)

# Initialize loss function and optimizer
if 'AQI' in selected_output_variable:
    criterion = nn.CrossEntropyLoss() # classification
else:
    criterion = nn.MSELoss() # regression

# Test learning rates
for alpha in alphas:
    print("Training with learning rate: {}".format(alpha))
    optimizer = optim.SGD(net.parameters(), lr=alpha)

    # train model and save to ./03_Trained_Models/NN/model_date_<model_tag>.pt
    model_tag = '1_hidden_relu__alpha_{}'.format(alpha)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test optimizers
for optimizer_name in optimizers:
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=0.05)
    else: 
        optimizer = optim.Adam(net.parameters(), lr=0.05)
    
    print("Training optimizer: {}".format(optimizer_name))
    model_tag = '1_hidden_relu__optimizer_{}'.format(optimizer_name)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test iterations
for num_epochs in num_epochs_:
    print("Training number of epochs: {}".format(num_epochs))
    optimizer = optim.SGD(net.parameters(), lr=0.05)

    # train model and save to ./03_Trained_Models/NN/model_date_<model_tag>.pt
    model_tag = '1_hidden_relu__epochs_{}'.format(num_epochs)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test trained model
#print('Testing')
#output = net(X_test)
#loss = criterion(output, y_test).item()
#print('\tTest loss = ', loss)

# Initialize network
net = Net(res, num_channels, m)
L = len(list(net.parameters())) # number of layers
# print(net)

# Initialize loss function and optimizer
if 'AQI' in selected_output_variable:
    criterion = nn.CrossEntropyLoss() # classification
else:
    criterion = nn.MSELoss() # regression

# Test learning rates
for alpha in alphas:
    print("Training with learning rate: {}".format(alpha))
    optimizer = optim.SGD(net.parameters(), lr=alpha)

    # train model and save to ./03_Trained_Models/NN/model_date_<model_tag>.pt
    model_tag = '1_hidden_relu__alpha_{}'.format(alpha)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test optimizers
for optimizer_name in optimizers:
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=0.05)
    else: 
        optimizer = optim.Adam(net.parameters(), lr=0.05)
    
    print("Training optimizer: {}".format(optimizer_name))
    model_tag = '1_hidden_relu__optimizer_{}'.format(optimizer_name)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test iterations
for num_epochs in num_epochs_:
    print("Training number of epochs: {}".format(num_epochs))
    optimizer = optim.SGD(net.parameters(), lr=0.05)

    # train model and save to ./03_Trained_Models/NN/model_date_<model_tag>.pt
    model_tag = '1_hidden_relu__epochs_{}'.format(num_epochs)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test trained model
#print('Testing')
#output = net(X_test)
#loss = criterion(output, y_test).item()
#print('\tTest loss = ', loss)

In [27]:
# Initialize network
net = Net(res, num_channels, m)
L = len(list(net.parameters())) # number of layers
# print(net)

# Initialize loss function and optimizer
if 'AQI' in selected_output_variable:
    criterion = nn.CrossEntropyLoss() # classification
else:
    criterion = nn.MSELoss() # regression

# Test learning rates
for alpha in alphas:
    print("Training with learning rate: {}".format(alpha))
    optimizer = optim.SGD(net.parameters(), lr=alpha)

    # train model and save to ./03_Trained_Models/NN/model_date_<model_tag>.pt
    model_tag = '1_hidden_relu__alpha_{}'.format(alpha)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test optimizers
for optimizer_name in optimizers:
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=0.05)
    else: 
        optimizer = optim.Adam(net.parameters(), lr=0.05)
    
    print("Training optimizer: {}".format(optimizer_name))
    model_tag = '1_hidden_relu__optimizer_{}'.format(optimizer_name)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test iterations
for num_epochs in num_epochs_:
    print("Training number of epochs: {}".format(num_epochs))
    optimizer = optim.SGD(net.parameters(), lr=0.05)

    # train model and save to ./03_Trained_Models/NN/model_date_<model_tag>.pt
    model_tag = '1_hidden_relu__epochs_{}'.format(num_epochs)
    train(X_train, y_train, X_test, y_test, num_epochs, True, model_tag)
    
# Test trained model
#print('Testing')
#output = net(X_test)
#loss = criterion(output, y_test).item()
#print('\tTest loss = ', loss)

Training optimizer: SGD
Training...
	loss at epoch 0 = 268.502502
	test loss at epoch 0 = 105.432564

	loss at epoch 10 = 238.199722
	test loss at epoch 10 = 72.190025

	loss at epoch 20 = 242.243469
	test loss at epoch 20 = 69.423798

	loss at epoch 30 = 264.128998
	test loss at epoch 30 = 87.892616

	loss at epoch 40 = 232.711395
	test loss at epoch 40 = 60.906445

	loss at epoch 50 = 246.592880
	test loss at epoch 50 = 80.887581

	loss at epoch 60 = 261.346130
	test loss at epoch 60 = 97.284584

	loss at epoch 70 = 231.038239
	test loss at epoch 70 = 63.098049

	loss at epoch 80 = 251.603409
	test loss at epoch 80 = 77.279388

	loss at epoch 90 = 257.293823
	test loss at epoch 90 = 81.853081

Training optimizer: Adam
Training...
	loss at epoch 0 = 229.861282
	test loss at epoch 0 = 60.030514

	loss at epoch 10 = 269.459778
	test loss at epoch 10 = 106.450745

	loss at epoch 20 = 269.459778
	test loss at epoch 20 = 106.450745

	loss at epoch 30 = 269.459778
	test loss at epoch 30 = 1

	loss at epoch 110 = 269.459778
	test loss at epoch 110 = 106.450745

	loss at epoch 120 = 269.459778
	test loss at epoch 120 = 106.450745

	loss at epoch 130 = 269.459778
	test loss at epoch 130 = 106.450745

	loss at epoch 140 = 269.459778
	test loss at epoch 140 = 106.450745

	loss at epoch 150 = 269.459778
	test loss at epoch 150 = 106.450745

	loss at epoch 160 = 269.459778
	test loss at epoch 160 = 106.450745

	loss at epoch 170 = 269.459778
	test loss at epoch 170 = 106.450745

	loss at epoch 180 = 269.459778
	test loss at epoch 180 = 106.450745

	loss at epoch 190 = 269.459778
	test loss at epoch 190 = 106.450745

	loss at epoch 200 = 269.459778
	test loss at epoch 200 = 106.450745

	loss at epoch 210 = 269.459778
	test loss at epoch 210 = 106.450745

	loss at epoch 220 = 269.459778
	test loss at epoch 220 = 106.450745

	loss at epoch 230 = 269.459778
	test loss at epoch 230 = 106.450745

	loss at epoch 240 = 269.459778
	test loss at epoch 240 = 106.450745

	loss at epoch 250 =