In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
#from tqdm.notebook import tqdm, trange
from tqdm import tqdm, trange
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn import preprocessing
from copy import deepcopy
import plotly.express as px
from sklearn.compose import make_column_selector as selector


In [139]:
X = pd.read_csv("data/parallel_x_labels.csv")
X = X.drop(['ts'],axis=1)
X = X.drop(['Unnamed: 0'],axis=1)
#X = X.fillna(0)
X.iat[-2,0] = X.iat[-1,0]
#X = X.drop([0,len(X)-1])
X = X.dropna()

#train_tensor = torch.tensor(train.values)
Y = pd.read_csv('data/check_parallel_right_time.csv')
Y = Y.drop(['Unnamed: 0'],axis=1)
Y = Y.drop(['Unnamed: 0.1'],axis=1)
Y = Y.drop(['PM2.5'],axis=1)
Y = Y.drop(['PM10'],axis=1)


Y = Y.drop(['ts'],axis=1)

In [140]:
ct_x = ColumnTransformer([
    ("norm1", preprocessing.StandardScaler(), selector(dtype_exclude=object)),
    ("norm2", preprocessing.OneHotEncoder(), selector(dtype_include=np.number))
])
labels = X.labels
#X.loc[:,'labels'] = 0
#X = X.drop(['labels'], axis=1)

In [141]:
X.shape
labels = X.labels.unique().tolist()
sentence_idx = np.linspace(0,len(labels), len(labels), False)
num_labels = X.labels.map(lambda x: labels.index(x))
X.labels = 0

In [160]:
materials = list( dict.fromkeys([item for label in labels for item in label.split(" ")]) )

In [142]:

scalerX = preprocessing.StandardScaler().fit(X)
X= scalerX.transform(X)
scalerY = preprocessing.StandardScaler().fit(Y)
Y = scalerY.transform(Y)

X[:,0] = num_labels

In [143]:
X = X.astype(np.single)
Y = Y.astype(np.single)

In [144]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42, test_size=0.15)

#train = pd.DataFrame(columns=['X','Y'])
#test = pd.DataFrame(columns=['X','Y'])
test = []
train = []

for X, Y in tqdm(zip(X_train, y_train), total=X_train.shape[0]):
    train.append((X,Y))
for X, Y in tqdm(zip(X_test, y_test), total=X_test.shape[0]):
    test.append((X,Y))

100%|██████████| 10369/10369 [00:00<00:00, 518401.06it/s]
100%|██████████| 1830/1830 [00:00<00:00, 365747.47it/s]


In [145]:
batch_size = 20

# Create data loaders.
test = DataLoader(test, batch_size=batch_size, shuffle=True)
train = DataLoader(train, batch_size=batch_size, shuffle=True)

for X, y in test:
    print(f"Shape of X : {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X : torch.Size([20, 42])
Shape of y: torch.Size([20, 3]) torch.float32


In [188]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self, materials, n_channels, labels):
        self.n_sensing = n_channels
        self.labels = labels
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.sens_pipe = nn.ModuleDict()
        for mat in materials:
            self.sens_pipe[mat] = nn.Sequential(
                nn.Linear(4, 16),
                nn.LeakyReLU(),
                nn.Linear(16, 32),
                nn.LeakyReLU(),
                nn.Linear(32, 16),
                nn.LeakyReLU(),
                nn.Linear(16, 3)
            )
        self.heads = nn.ModuleDict()
        for label in labels:
            self.heads[label] = nn.Sequential(
                nn.Linear(self.n_sensing * 3 + 9, 128),
                nn.LeakyReLU(),
                nn.Linear(128, 512),
                nn.LeakyReLU(),
                nn.Linear(512, 64),
                nn.LeakyReLU(),
                nn.Linear(64, 3),
                nn.LeakyReLU(),
                nn.Dropout(0.03),
                nn.Dropout(0.1),
                nn.Dropout(0.1),
            )

    def forward(self, x):
        dim = 0 if x.dim()<2 else 1
        materials, x = torch.split(x, [1, x.size()[dim]-1])
        materials = labels[int(materials.squeeze().tolist())].split(" ")

        channels, outside_features = torch.split(x, self.n_sensing * 4, dim=dim)
        channels = torch.split(channels, 4, dim=dim)
        merged = torch.cat(
            [self.sens_pipe[mat](channel) for mat, channel in zip(materials,channels)],
            dim=dim
        )
        merged = torch.cat(
            ( merged,outside_features),
            dim=dim
        )

        out = self.heads[" ".join(materials)](merged)
        return out

model = NeuralNetwork(materials, 8, labels).to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (sens_pipe): ModuleDict(
    (LaFeO3): Sequential(
      (0): Linear(in_features=4, out_features=16, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
      (2): Linear(in_features=16, out_features=32, bias=True)
      (3): LeakyReLU(negative_slope=0.01)
      (4): Linear(in_features=32, out_features=16, bias=True)
      (5): LeakyReLU(negative_slope=0.01)
      (6): Linear(in_features=16, out_features=3, bias=True)
    )
    (WO3): Sequential(
      (0): Linear(in_features=4, out_features=16, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
      (2): Linear(in_features=16, out_features=32, bias=True)
      (3): LeakyReLU(negative_slope=0.01)
      (4): Linear(in_features=32, out_features=16, bias=True)
      (5): LeakyReLU(negative_slope=0.01)
      (6): Linear(in_features=16, out_features=3, bias=True)
    )
    (ZnO): Sequential(
      (0): Linear(in_features=4, out_features=16, bias=True)
      

In [189]:
loss_fn = nn.MSELoss()
#loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-7, momentum=0.9)

In [190]:
def trainf(dataloader, model, loss_fn, optimizer):
    global best_model
    best_loss = float("inf")
    size = len(dataloader.dataset)
    model.float()
    model.train()
    for batch , (X, Y) in enumerate(dataloader):
        X, Y = X.to(device), Y.to(device)
        loss = float("inf")
        optimizer.zero_grad()
        # Compute prediction error
        for x_line, y_line in zip(X, Y):
            pred = model(x_line)
            loss = loss_fn(pred, y_line)
            loss.backward()
        # Backpropagation
        optimizer.step()

        if loss < best_loss:
            best_loss = loss
            best_model = deepcopy(model)

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [191]:
def testf(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, Y in dataloader:
            X, Y = X.to(device), Y.to(device)
            for x_line, y_line in zip(X, Y):
                pred = model(x_line)
                test_loss += loss_fn(pred, y_line).item()
                #correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches * batch_size
    #correct /= size
    #print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")


In [192]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    trainf(train, model, loss_fn, optimizer)
    testf(test, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.121345  [    0/10369]
loss: 1.282001  [ 2000/10369]
loss: 0.599174  [ 4000/10369]
loss: 0.616850  [ 6000/10369]
loss: 0.771404  [ 8000/10369]
loss: 0.290343  [10000/10369]
Test Error: 
 Avg loss: 0.956912 

Epoch 2
-------------------------------
loss: 0.969510  [    0/10369]
loss: 0.630723  [ 2000/10369]
loss: 0.325090  [ 4000/10369]
loss: 0.460862  [ 6000/10369]
loss: 0.451486  [ 8000/10369]
loss: 0.794783  [10000/10369]
Test Error: 
 Avg loss: 0.956678 

Epoch 3
-------------------------------
loss: 0.729151  [    0/10369]
loss: 1.009280  [ 2000/10369]
loss: 0.319063  [ 4000/10369]
loss: 0.084851  [ 6000/10369]
loss: 0.260851  [ 8000/10369]
loss: 0.509333  [10000/10369]
Test Error: 
 Avg loss: 0.956441 

Epoch 4
-------------------------------
loss: 0.853690  [    0/10369]
loss: 0.238585  [ 2000/10369]
loss: 0.429143  [ 4000/10369]
loss: 2.157649  [ 6000/10369]
loss: 0.763685  [ 8000/10369]
loss: 0.397187  [10000/10369]
Test Error: 
 A

In [126]:
for i in range(3):
    px.scatter(
        x = np.concatenate((y_train, y_test), axis = 0)[:,i],
        y = model(torch.tensor(np.concatenate((X_train, X_test), axis = 0))).detach().numpy()[:,i]
    ).show()

RuntimeError: split_with_sizes expects split_sizes to sum exactly to 12199 (input tensor's size at dimension 0), but got split_sizes=[1, 41]