In [1]:
import copy
import time
import math
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from tqdm import tqdm
from datetime import datetime
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, accuracy_score

In [2]:
device = torch.device('cpu')

appa1 = pd.read_csv("noam_exports/appa1.csv")
appa1 = appa1.drop(columns='Unnamed: 0')

appa2 = pd.read_csv("noam_exports/appa1.csv")
appa2 = appa2.drop(columns='Unnamed: 0')

# CNN-LSTM Neural Network

In [3]:
class CNN_LSTM(nn.Module):
    def __init__(self):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=40, kernel_size=(4, 37))
        self.lstm = nn.LSTM(21, 200, batch_first=True)
        self.fc1 = nn.Linear(8000, 150)
        self.fc2 = nn.Linear(150, 50)
        self.fc3 = nn.Linear(50, 3)
        self.dropout1 = nn.Dropout(0.03)
        self.dropout2 = nn.Dropout(0.1)
        self.dropout3 = nn.Dropout(0.1)
    
    def forward(self, x, hidden):
        x = self.conv1(x)
        x = F.relu(x)
        x = torch.squeeze(x)
        x, (h_n, c_n) = self.lstm(x, hidden)
        x = F.relu(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc1(x)
        x = self.dropout1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = self.dropout2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = self.dropout3(x)
        return x, (h_n, c_n)

CNN_LSTM = CNN_LSTM()
print(CNN_LSTM)

CNN_LSTM(
  (conv1): Conv2d(1, 40, kernel_size=(4, 37), stride=(1, 1))
  (lstm): LSTM(21, 200, batch_first=True)
  (fc1): Linear(in_features=8000, out_features=150, bias=True)
  (fc2): Linear(in_features=150, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=3, bias=True)
  (dropout1): Dropout(p=0.03, inplace=False)
  (dropout2): Dropout(p=0.1, inplace=False)
  (dropout3): Dropout(p=0.1, inplace=False)
)


In [4]:
X = appa1[[
       'ZnOR_1', 'ZnOR_2',
       'LaFeO3_1', 'LaFeO3_2',
       'WO3_1', 'WO3_2',
       'ZnOR_1_heatR', 'ZnOR_2_heatR',
       'LaFeO3_1_heatR', 'LaFeO3_2_heatR',
       'WO3_1_heatR', 'WO3_2_heatR',
       'ZnOR_1_heatV', 'ZnOR_2_heatV',
       'LaFeO3_1_heatV', 'LaFeO3_2_heatV',
       'WO3_1_heatV', 'WO3_2_heatV',
       'Temperature', 'Relative_Humidity', 'Pressure', 'VOC',
       'ZnOR_1_Age', 'ZnOR_2_Age',
       'LaFeO3_1_Age', 'LaFeO3_2_Age',
       'WO3_1_Age', 'WO3_2_Age',
       'sin_hour', 'cos_hour',
       'sin_weekday', 'cos_weekday',
       'sin_month', 'cos_month',
       'sin_ordate', 'cos_ordate',
       'year'
    ]].to_numpy()
Y = appa1[['NO2', 'O3', 'CO']].to_numpy()

In [5]:
from sklearn.preprocessing import StandardScaler


scaleX = StandardScaler()
scaleY = StandardScaler()
scaleX.fit(X)
scaleY.fit(Y)

In [6]:
X = scaleX.transform(X)
Y = scaleY.transform(Y)

In [7]:
X = np.lib.stride_tricks.sliding_window_view(X, (24, 37))
Y = Y[X.shape[2] - 1 :]
(X.shape, Y.shape)

((13678, 1, 24, 37), (13678, 3))

In [8]:
train, test = train_test_split(list(zip(X,Y)), test_size=0.2)

In [9]:
train = train[len(train)%7:]
test = test[len(test)%7:]

In [10]:
train_dl = DataLoader(train, batch_size=7, shuffle=True)
test_dl = DataLoader(test, batch_size=7, shuffle=True)

In [14]:
def fit_model(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNN_LSTM
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    hidden = (torch.zeros(1, 7, 200), torch.zeros(1, 7, 200))
    epochs, durations, errors = list(range(10)), [], []

    for epoch in epochs:
        train_loss = 0
        epoch_start = datetime.now()
        for features, tag in tqdm(train_dl):
            tag = tag.float()
            optimizer.zero_grad()
            pred, hidden = model(features.clone().detach().float(), hidden)
            loss = criterion(pred, tag)
            optimizer.step()
            train_loss += loss.item()
        
        epoch_end = datetime.now()
        durations.append((epoch_end - epoch_start).total_seconds())
        test_loss = 0
        hidden = (torch.zeros(1, 7, 200), torch.zeros(1, 7, 200))
        with torch.no_grad():
            for features, tag in tqdm(test_dl):
                pred, hidden = model(features.clone().detach().float(), hidden)
                loss = criterion(pred, tag)
                test_loss += loss.item()
                errors.append(mean_absolute_error(tag, pred))
        print(epochs[epoch] + 1, durations[epoch], errors[epoch])
    
    print(epochs)
    print(durations)
    print(errors)
    results = pd.DataFrame(data={
        'Epoch': epochs,
        'Training Time': durations,
        'Error': errors,
        })

    results.to_csv('Results.csv')
    torch.cuda.empty_cache()

In [15]:
fit_model(CNN_LSTM)

100%|██████████| 1563/1563 [00:50<00:00, 31.11it/s]
100%|██████████| 390/390 [00:19<00:00, 20.44it/s]


1 50.248039 1.0349507945722476


100%|██████████| 1563/1563 [00:41<00:00, 37.87it/s]
100%|██████████| 390/390 [00:09<00:00, 42.06it/s] 


2 41.282725 0.6271188045543894


100%|██████████| 1563/1563 [00:22<00:00, 69.56it/s]
100%|██████████| 390/390 [00:11<00:00, 32.51it/s] 


3 22.473886 0.7014039798112757


100%|██████████| 1563/1563 [00:28<00:00, 55.20it/s]
100%|██████████| 390/390 [00:10<00:00, 37.15it/s] 


4 28.320125 1.0300990354514001


100%|██████████| 1563/1563 [00:24<00:00, 64.86it/s]
100%|██████████| 390/390 [00:09<00:00, 42.52it/s] 


5 24.100733 0.816169126055032


100%|██████████| 1563/1563 [00:24<00:00, 63.23it/s]
100%|██████████| 390/390 [00:09<00:00, 43.18it/s] 


6 24.72445 0.5307350510465368


100%|██████████| 1563/1563 [00:35<00:00, 43.73it/s]
100%|██████████| 390/390 [00:20<00:00, 19.17it/s]


7 35.744296 0.7738796623292822


100%|██████████| 1563/1563 [00:31<00:00, 49.78it/s]
100%|██████████| 390/390 [00:12<00:00, 32.20it/s] 


8 31.402012 0.5357940492537375


100%|██████████| 1563/1563 [00:27<00:00, 56.74it/s]
100%|██████████| 390/390 [00:09<00:00, 41.32it/s] 


9 27.547662 0.5787890726038044


100%|██████████| 1563/1563 [00:24<00:00, 62.77it/s]
100%|██████████| 390/390 [00:09<00:00, 41.33it/s] 


10 24.903138 0.6346664662746823
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[50.248039, 41.282725, 22.473886, 28.320125, 24.100733, 24.72445, 35.744296, 31.402012, 27.547662, 24.903138]
[1.0349507945722476, 0.6271188045543894, 0.7014039798112757, 1.0300990354514001, 0.816169126055032, 0.5307350510465368, 0.7738796623292822, 0.5357940492537375, 0.5787890726038044, 0.6346664662746823, 0.7474058499744877, 1.2217859427498372, 0.7923776875527633, 0.8218418773907522, 0.7396118851388144, 0.9991990711649307, 0.5960160997422308, 0.5066190472678431, 0.866118002757823, 0.8750245230627048, 0.9323709928382158, 0.5416826779705818, 0.9409560755427684, 0.6346127368438094, 0.8275681144396154, 0.7464975146410774, 0.698606224361296, 0.8758575822054944, 0.6179060346462225, 0.6300808947869404, 0.7050327063754134, 0.9167560143957014, 0.8001019468364848, 0.6515555146160804, 0.7321080821571537, 0.7003851554754238, 0.9115656690425183, 0.8360540795924755, 0.7301395402586243, 0.9793897614095407, 0.8347097924781082, 0.4919451

ValueError: All arrays must be of the same length