## Proposal benchmark model

In [27]:
import torch # install using $ pip install torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
import pandas as pd
import pickle
from sklearn.neural_network import MLPRegressor
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import os

## Dataset preprocessing

In [28]:
# Set dataset locations
selected_output_variable = "OZONE"
data_file = "01_Data/02_Imagery/data_and_imagery_test.pkl"

In [10]:
def process_data(data_file, selected_output_variable):
    """
    Load and reshape images into a numpy array
    Input:
        filepath to file with matched image and emission data
    Output:
        numpy array of shape (resolution * resolution * num_channels, num_images)
    """
    # open file
    data = pickle.load(open(data_file, 'rb'))
    
    # filter for output variable
    data = data[data['type'] == selected_output_variable]
    
    # get image dims
    m = len(data)
    res, num_channels = data['imagery'][0].shape[0], data['imagery'][0].shape[2]
    
    # get X data as np array and check dims
    images = np.array(data['imagery'].to_list())
    images = images.reshape(images.shape[0], -1).T
    assert(images.shape == (res*res*num_channels, m))
    
    # get y data as np array and check dims
    if "AQI" in selected_output_variable:        # Distinguish between preprocessing for classification and regression
        labels = data['AQI_level'].to_numpy().reshape(1,m)
    else:
        labels = data['value'].to_numpy().reshape(1,m)
    
    assert(labels.shape == (1,m))
    
    return images, labels

## Train model

In [26]:
# load data
X, y = process_data(data_file, selected_output_variable)
X, y = X.T, np.ravel(y.T) # sklearn likes them transposed and a (m,) shape for y
print("size of X: ", X.shape)
print("size of Y: ", y.shape)

# Create train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
print('number of training samples: ', X_train.shape[0])
print('number of test samples: ', X_test.shape[0])

# Train model
if "AQI" in selected_output_variable:
    model = MLPClassifier(random_state=1, max_iter=500).fit(X_train, y_train)
else:
    model = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)

# Evaluate performance
model.score(X_test, y_test)

size of X:  (7, 3468)
size of Y:  (7,)
number of training samples:  5
number of test samples:  2


-10.496945859572934

In [15]:
# old code below

In [None]:
res = 64
num_channels = 3 # will be 7 later
num_images = 10
learning_rate = 0.05
num_categories = 6
classes = ['good', 'moderate', 'unhealthy_sensitive_groups',
           'unealthy', 'very_unhealthy', 'hazardous']

In [20]:
class Net(nn.Module):
    """
    Define the neural network
    """
    def __init__(self):
        super(Net,self).__init__()
        self.layer1 = nn.ReLU() # One linear layer

    def forward(self, x):
        """
        Define forward pass
        """
        x = self.layer1(x)
        return x

In [21]:
def train(x_train, y_train):
    """
    Train model on training data
    Input:
        x data and labels
    Output:
        trained model
    """
    # model = torchvision.models.resnet18(pretrained=True)
    # loss_sum = 0
    
    for epoch in range(1):
        optimizer.zero_grad() # zero gradients
        # x_train = x_train.reshape(num_images, res*res*num_channels)
        # x_train = x_train.to(torch.float64)
        output = net(x_train) #.float())
        # print(output)
        # print(y_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

        # loss_sum += loss.item()  

    # save trained model
    # PATH = ('./models/benchmark_model.pth')
    # torch.save(net.state_dict(), PATH)
    return net

In [24]:
# Main

net = Net()
criterion = nn.MSELoss() # nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.05)

# load data
X, y = process_data(data_file)
print("size of X: ", X.shape)
print("size of y: ", y.shape)

# Train model
print(net)
train(X, y)

TypeError: 'type' object is not iterable