In [188]:
import torch
from torch import nn
from torch.utils.data import DataLoader
#from tqdm.notebook import tqdm, trange
from tqdm import tqdm, trange
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from copy import deepcopy
import plotly.express as px

n_channels = 14

In [189]:
X = pd.read_csv("data/parallel_right_time.csv")
X = X.drop(['ts'],axis=1)
X = X.drop(['Unnamed: 0'],axis=1)
X = X.drop(['Unnamed: 0.1'],axis=1)
X = X.fillna(0)


#train_tensor = torch.tensor(train.values)
Y = pd.read_csv('data/check_parallel_right_time.csv')
Y = Y.drop(['Unnamed: 0'],axis=1)
Y = Y.drop(['Unnamed: 0.1'],axis=1)
Y = Y.drop(['PM2.5'],axis=1)
Y = Y.drop(['PM10'],axis=1)


Y = Y.drop(['ts'],axis=1)

In [120]:
scalerX = preprocessing.StandardScaler().fit(X)
X = scalerX.transform(X)
scalerY = preprocessing.StandardScaler().fit(Y)
Y = scalerY.transform(Y)

In [121]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=42, test_size=0.15)

#train = pd.DataFrame(columns=['X','Y'])
#test = pd.DataFrame(columns=['X','Y'])
test = []
train = []

for X, Y in tqdm(zip(X_train, y_train), total=X_train.shape[0]):
    train.append((X,Y))
for X, Y in tqdm(zip(X_test, y_test), total=X_test.shape[0]):
    test.append((X,Y))

100%|██████████| 10369/10369 [00:00<00:00, 493792.09it/s]
100%|██████████| 1830/1830 [00:00<00:00, 609753.44it/s]


In [122]:
batch_size = 20

# Create data loaders.
test = DataLoader(test, batch_size=batch_size, shuffle=True)
train = DataLoader(train, batch_size=batch_size, shuffle=True)

for X, y in test:
    print(f"Shape of X : {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X : torch.Size([20, 65])
Shape of y: torch.Size([20, 3]) torch.float64


In [180]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self, n_sensing):
        global n_channels
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        # take in input resistance, age, heater_r and heater_V
        self.parallel_relu_stack_channel = nn.Sequential(
            nn.Linear(4, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 16),
            nn.LeakyReLU(),
            nn.Linear(16, 3),
        )
        # now I have 5 * 8 input(sensing_material) + 12?(time and P,T,RH...)
        self.linear_merged_stack = nn.Sequential(
            nn.Linear(n_channels * 3 + 9,256),
            nn.ReLU(),
            nn.Linear(256,512),
            nn.ReLU(),
            nn.Linear(512,64),
            nn.ReLU(),
            nn.Linear(64,3)
        )



    def forward(self, x):
        global n_channels

        channels, outside_features = torch.split(x, n_channels * 4, dim=0 if x.dim()<2 else 1)
        channels = torch.split(channels, 4, dim=0 if channels.dim()<2 else 1)
        out_channels = []
        for channel in channels:
            out_channels.append(self.parallel_relu_stack_channel(channel))

        merged = torch.cat(
            ( torch.cat(out_channels,dim=0 if x.dim()<2 else 1),outside_features),
            dim=0 if x.dim()<2 else 1
        )
        merged = self.linear_merged_stack(merged)
        return merged

        #x = self.flatten(x)
        #logits = self.linear_relu_stack(x)
        #return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (parallel_relu_stack_channel): Sequential(
    (0): Linear(in_features=4, out_features=16, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=16, out_features=16, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=16, out_features=3, bias=True)
  )
  (linear_merged_stack): Sequential(
    (0): Linear(in_features=51, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=3, bias=True)
  )
)


In [181]:
loss_fn = nn.MSELoss()
#loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-7, momentum=0.9)

In [182]:
def trainf(dataloader, model, loss_fn, optimizer):
    global best_model
    best_loss = float("inf")
    size = len(dataloader.dataset)
    model.double()
    model.train()
    for batch , (X, Y) in enumerate(dataloader):
        X, Y = X.to(device), Y.to(device)
        loss = float("inf")
        optimizer.zero_grad()
        # Compute prediction error
        for x_line, y_line in zip(X, Y):
            pred = model(x_line)
            loss = loss_fn(pred, y_line)
            loss.backward()
        # Backpropagation
        optimizer.step()

        if loss < best_loss:
            best_loss = loss
            best_model = deepcopy(model)

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [183]:
def testf(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, Y in dataloader:
            X, Y = X.to(device), Y.to(device)
            for x_line, y_line in zip(X, Y):
                pred = model(x_line)
                test_loss += loss_fn(pred, y_line).item()
                #correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches * batch_size
    #correct /= size
    #print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")


In [187]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    trainf(train, model, loss_fn, optimizer)
    testf(test, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.892639  [    0/10369]
loss: 0.192467  [ 2000/10369]


KeyboardInterrupt: 

In [185]:
for i in range(3):
    px.scatter(
        x = np.concatenate((y_train, y_test), axis = 0)[:,i],
        y = model(torch.tensor(np.concatenate((X_train, X_test), axis = 0))).detach().numpy()[:,i]
    ).show()