In [1]:
import torch
import warnings
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from tqdm import tqdm
from datetime import datetime
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import median_absolute_error, mean_absolute_error, r2_score

device = torch.device('cpu')
warnings.filterwarnings("ignore")

In [2]:
appa1 = pd.read_csv("noam_exports/appa1.csv")
appa1 = appa1.drop(columns='Unnamed: 0')

appa2 = pd.read_csv("noam_exports/appa2.csv")
appa2 = appa2.drop(columns='Unnamed: 0')

# CNN-LSTM Neural Network

In [3]:
class CNN_LSTM(nn.Module):
    def __init__(self):
        super(CNN_LSTM, self).__init__()
        self.conv = nn.Conv2d(in_channels=1, out_channels=40, kernel_size=(4, 37))
        self.lstm = nn.LSTM(21, 200, batch_first=True)
        self.fc1 = nn.Linear(8000, 150)
        self.fc2 = nn.Linear(150, 50)
        self.fc3 = nn.Linear(50, 3)
        self.dropout1 = nn.Dropout(0.03)
        self.dropout2 = nn.Dropout(0.1)
        self.dropout3 = nn.Dropout(0.1)
    
    def forward(self, x, hidden):
        x = self.conv(x)
        x = F.relu(x)
        x = torch.squeeze(x)
        x, (h_n, c_n) = self.lstm(x, hidden)
        x = F.relu(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc1(x)
        x = self.dropout1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = self.dropout2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = self.dropout3(x)
        return x, (h_n, c_n)

CNN_LSTM = CNN_LSTM()
print(CNN_LSTM)

CNN_LSTM(
  (conv1): Conv2d(1, 40, kernel_size=(4, 37), stride=(1, 1))
  (lstm): LSTM(21, 200, batch_first=True)
  (fc1): Linear(in_features=8000, out_features=150, bias=True)
  (fc2): Linear(in_features=150, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=3, bias=True)
  (dropout1): Dropout(p=0.03, inplace=False)
  (dropout2): Dropout(p=0.1, inplace=False)
  (dropout3): Dropout(p=0.1, inplace=False)
)


In [4]:
X = appa1[[
       'ZnOR_1', 'ZnOR_2',
       'LaFeO3_1', 'LaFeO3_2',
       'WO3_1', 'WO3_2',
       'ZnOR_1_heatR', 'ZnOR_2_heatR',
       'LaFeO3_1_heatR', 'LaFeO3_2_heatR',
       'WO3_1_heatR', 'WO3_2_heatR',
       'ZnOR_1_heatV', 'ZnOR_2_heatV',
       'LaFeO3_1_heatV', 'LaFeO3_2_heatV',
       'WO3_1_heatV', 'WO3_2_heatV',
       'Temperature', 'Relative_Humidity', 'Pressure', 'VOC',
       'ZnOR_1_Age', 'ZnOR_2_Age',
       'LaFeO3_1_Age', 'LaFeO3_2_Age',
       'WO3_1_Age', 'WO3_2_Age',
       'sin_hour', 'cos_hour',
       'sin_weekday', 'cos_weekday',
       'sin_month', 'cos_month',
       'sin_ordate', 'cos_ordate',
       'year'
    ]].to_numpy()
Y = appa1[['CO', 'NO2']].to_numpy()

In [5]:
# Perform data normalization
scaleX = StandardScaler()
scaleY = StandardScaler()
scaleX.fit(X)
scaleY.fit(Y)
X = scaleX.transform(X)
Y = scaleY.transform(Y)

In [6]:
X = np.lib.stride_tricks.sliding_window_view(X, (24, 37))
Y = Y[X.shape[2] - 1 :]
(X.shape, Y.shape)

((13678, 1, 24, 37), (13678, 3))

In [7]:
train, test = train_test_split(list(zip(X,Y)), test_size=0.2)

In [8]:
train = train[len(train)%7:]
test = test[len(test)%7:]

In [9]:
train_dl = DataLoader(train, batch_size=7, shuffle=True)
test_dl = DataLoader(test, batch_size=7, shuffle=True)

In [10]:
def fit_model(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CNN_LSTM
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    hidden = (torch.zeros(1, 7, 200), torch.zeros(1, 7, 200))
    results = pd.DataFrame({
        'epoch': [],
        'training_time': [],
        'NO2_median': [],
        'NO2_mean': [],
        'CO_median': [],
        'CO_mean': [],
        'O3_median': [],
        'O3_mean': []
        })

    for epoch in range(10):
        
        train_loss = 0
        epoch_start = datetime.now()
        for features, tag in tqdm(train_dl):
            tag = tag.float()
            optimizer.zero_grad()
            pred, hidden = model(features.clone().detach().float(), hidden)
            loss = criterion(pred, tag)
            optimizer.step()
            train_loss += loss.item()
        epoch_end = datetime.now()
        duration = (epoch_end - epoch_start).total_seconds()
        
        test_loss = 0
        hidden = (torch.zeros(1, 7, 200), torch.zeros(1, 7, 200))
        with torch.no_grad():
            epoch_values = pd.DataFrame({
                'NO2_tag': [],
                'CO_tag': [],
                'O3_tag': [],
                'NO2_pred': [],
                'CO_pred': [],
                'O3_pred': []
                })
            for features, tag in tqdm(test_dl):
                pred, hidden = model(features.clone().detach().float(), hidden)
                loss = criterion(pred, tag)
                test_loss += loss.item()
                tag, pred = tag.numpy(), pred.detach().numpy()

                epoch_values = pd.concat([epoch_values, pd.DataFrame(data={
                    'NO2_tag': tag[0],
                    'CO_tag': tag[1],
                    'O3_tag': tag[2],
                    'NO2_pred': pred[0],
                    'CO_pred': pred[1],
                    'O3_pred': pred[2],
                })])
    
            results = pd.concat([results, pd.DataFrame(data={
                'epoch': int(epoch + 1),
                'training_time': duration,
                'NO2_median': [median_absolute_error(epoch_values.NO2_tag, epoch_values.NO2_pred)],
                'NO2_mean': [mean_absolute_error(epoch_values.NO2_tag, epoch_values.NO2_pred)],
                'CO_median': [median_absolute_error(epoch_values.CO_tag, epoch_values.CO_pred)],
                'CO_mean': [mean_absolute_error(epoch_values.CO_tag, epoch_values.CO_pred)],
                'O3_median': [median_absolute_error(epoch_values.O3_tag, epoch_values.O3_pred)],
                'O3_mean': [mean_absolute_error(epoch_values.O3_tag, epoch_values.O3_pred)],
                })])
                
    results.to_csv('Results.csv')
    torch.cuda.empty_cache()

In [11]:
fit_model(CNN_LSTM)

100%|██████████| 1563/1563 [00:25<00:00, 60.41it/s]
100%|██████████| 390/390 [00:14<00:00, 26.30it/s] 
100%|██████████| 1563/1563 [00:28<00:00, 54.29it/s]
100%|██████████| 390/390 [00:10<00:00, 35.81it/s]
100%|██████████| 1563/1563 [00:28<00:00, 55.29it/s]
100%|██████████| 390/390 [00:09<00:00, 41.24it/s]
100%|██████████| 1563/1563 [00:28<00:00, 54.90it/s]
100%|██████████| 390/390 [00:09<00:00, 41.69it/s]
100%|██████████| 1563/1563 [00:29<00:00, 53.10it/s]
100%|██████████| 390/390 [00:09<00:00, 41.42it/s]
100%|██████████| 1563/1563 [00:42<00:00, 36.76it/s]
100%|██████████| 390/390 [00:08<00:00, 43.78it/s]
100%|██████████| 1563/1563 [00:26<00:00, 58.19it/s]
100%|██████████| 390/390 [00:10<00:00, 36.91it/s]
100%|██████████| 1563/1563 [00:39<00:00, 39.20it/s]
100%|██████████| 390/390 [00:19<00:00, 19.50it/s]
100%|██████████| 1563/1563 [00:32<00:00, 48.67it/s]
100%|██████████| 390/390 [00:11<00:00, 34.66it/s]
100%|██████████| 1563/1563 [00:29<00:00, 52.53it/s]
100%|██████████| 390/390 [00: