In [159]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torch.nn.functional import pad

import pandas as pd
import numpy as np

from metrics.helper import metrics

In [98]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [198]:
# Hyperparams
input_size = 20
sequence_length = "f"
num_layers = 2
hidden_size = 100
num_epochs = 1
batch_size = 2
learning_rate = 0.001

In [199]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, nonlinearity='tanh', batch_first=True)
        # x -> batch_size, sequence_length, input_size
        self.sig1 = nn.Sigmoid()
        self.fc1 = nn.Linear(hidden_size, 10)

        self.sig2 = nn.Sigmoid()
        self.fc2 = nn.Linear(10, 1)


    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # h0 = h0.type(torch.float64).to(device)
        out, _ = self.rnn(x, h0)
        # out: batch_size, seq_length, hidden_size
        # out = out[:, -1, :]
        # out = out.reshape((1, 1, 1500)) # this stops warning

        out = self.fc1(out)
        out = self.sig1(out)

        out = self.fc2(out)
        out = self.sig2(out)
        return out
        

In [200]:
model = RNN(input_size, hidden_size, num_layers).to(device)

In [201]:
class SequenceDataset(Dataset):
    def __init__(self, transform):
        self.data = pd.read_pickle('../../data/cnn/one_hot/data.csv')
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # y = output[index].reshape((1, -1)) # make the data match the shape of X after passing through all layers
        x = self.data['in'].iloc[index]
        # x = np.array(x, dtype=np.float64)
        x = x.T
        
        # x = x.T
        y = self.data['out'].iloc[index]
        y = y.reshape((1,-1))
        y = np.array(y, dtype=np.float64)

        if self.transform:
            x = self.transform(x)[0]
            # print(x.dtype)

        return x, y

In [202]:
dataset = SequenceDataset(transforms.ToTensor())
length = len(pd.read_pickle('../../data/cnn/one_hot/data.csv'))
train_len = (length * 1) // 10
test_len = length - train_len
train_set, test_set = torch.utils.data.random_split(dataset, [train_len, test_len])

train_loader = DataLoader(dataset=train_set, batch_size=2, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=2, shuffle=True)

In [204]:
from torch.nn.utils.rnn import pad_sequence

In [205]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (data, target) in enumerate(train_loader):
        data = data.to(device).float()
        print("this is data", data.shape)
        target = target.to(device).float().reshape(-1)
        outputs = model(data).reshape(-1)
        loss = criterion(outputs, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'epoch: [{epoch+1}/{num_epochs}], step: [{i+1}/{n_total_steps}], loss: {loss.item():.4f}')

RuntimeError: stack expects each tensor to be equal size, but got [557, 20] at entry 0 and [119, 20] at entry 1

: 

In [164]:
def test(model, threshhold=0.5):
    with torch.no_grad():
        n_samples = len(test_loader)
        accuracy = 0
        precision = 0
        recall = 0
        f1 = 0
        for input, target in test_loader:
            input = input.float()
            outputs = model(input)
            outputs = outputs.reshape(-1).to(device)
            # outputs = torch.round(outputs)
            target = target.reshape(-1).to(device).float()
            outputs = (outputs > threshhold).float()
            print(outputs)
            print()
            print(target)
            break
            accuracy_, precision_, recall_, f1_ = metrics(outputs, target)
            accuracy += accuracy_
            precision += precision_
            f1 += f1_
            recall += recall_

        accuracy = accuracy / n_samples
        precision = precision / n_samples
        recall = recall / n_samples
        f1 = f1 / n_samples

        print(f'Accuracy: {accuracy}')
        print(f'Precision: {precision}')
        print(f'Recall: {recall}')
        print(f'F1: {f1}')

    return (accuracy, precision, recall, f1)

In [170]:
test(model, 0.35)

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 

(0.0, 0.0, 0.0, 0.0)