In [1]:
import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [2]:
# print(torch.is_gpu_)

In [3]:

# Read the pickle object
with open("../../dataset/pickles/LOC3.pickle", "rb") as f:
    data = pickle.load(f)

# Convert the data to a pandas dataframe
df = pd.DataFrame(data)

# Encode the class labels
label_encoder = LabelEncoder()
df['class_label'] = label_encoder.fit_transform(df['class_label'])

# Define the maximum sequence length
max_len = 0
for i in df['lengths']:
    max_len=max(max_len,len(i))

# Pad the sequences to the maximum length
sequences = np.array(df['lengths'])
padded_sequences = np.zeros((len(sequences), max_len))
for i, sequence in enumerate(sequences):
    padded_sequences[i, :len(sequence)] = sequence

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, df['class_label'], test_size=0.2)

# Convert the data to PyTorch tensors
X_train = torch.from_numpy(X_train).to(torch.float32)
y_train = torch.from_numpy(y_train.values).long()
X_val = torch.from_numpy(X_val).to(torch.float32)
y_val = torch.from_numpy(y_val.values).long()
# check if split is stratified
# print("Training set class distribution:")
# print(y_train.unique(return_counts=True)) 
# print("Validation set class distribution:")
# print(y_val.unique(return_counts=True))
print(y_train.unique().shape)
print(y_val.unique().shape)

torch.Size([1500])
torch.Size([1500])


In [4]:
# Define a custom dataset
class SequenceDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        sequence = self.sequences[index]
        label = self.labels[index]
        return sequence, label


In [5]:

# Initialize the model
input_dim=0
for i in X_train:
    input_dim = max(input_dim,len(i))
# input_dim = len(X_train[0])
hidden_dim = 128
output_dim = 1500
learning_rate = 1e-3
print(input_dim)
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
# Define the dataloaders
train_dataset = SequenceDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = SequenceDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


1213


In [6]:

# Define the model architecture
class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, batch_size):
        super(RNN, self).__init__()
        self.batch_size=batch_size
        # simple 1d conv 
        self.conv1 = nn.Conv1d(1, 8, 3, padding=1)
        self.bn1 = nn.BatchNorm1d(8)
        self.relu = nn.ReLU()        
        
        # parameters for the RNN
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.output_dim = output_dim

        # define the RNN
        # self.rnn = nn.LSTM(8*input_dim, hidden_dim, batch_first=True)
        self.rnn = nn.RNN(8*input_dim, hidden_dim, batch_first=True)
        # define the output layer
        self.fc = nn.Linear(hidden_dim, output_dim)    
        self.h = self.init_hidden(self.batch_size)    
        self.flatten = nn.Flatten()
        # self.fc = nn.Linear(8*input_dim, output_dim)
        
    def init_hidden(self, bsz):
        # initialize the hidden state
        self.h=torch.zeros(1, bsz, self.hidden_dim).to(device)
    
    def forward(self, x):
        # reshape the input
        # x = x.view(self.batch_size, self.input_dim, -1)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.flatten(x)
        # initialize the hidden state
        self.init_hidden(x.shape[0])
        # get RNN unit outputs
        out, h = self.rnn(x.unsqueeze(1), self.h)
        # get the output for the last time step
        out = self.fc(out[:, -1, :])
        return out

In [7]:
device = torch.device('cuda:6' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
rnn = RNN(input_dim, hidden_dim, output_dim, batch_size=64)
rnn = rnn.to(device)
criterion = criterion.to(device)

optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)


Using device: cuda:6


In [8]:
from sklearn.metrics import precision_recall_fscore_support
def get_precision_recall_f1(y_true, y_pred):
    """Calculate precision, recall, F1 score for each class.
    Args:
        y_true (list): list of true labels
        y_pred (list): list of predicted labels
    Returns:
        dict: dictionary with precision, recall, F1 score for each class
    """
    # Calculate precision, recall, F1 score for each class
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')
    return {
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

num_epochs = 50
# optimizer = optimizer.to(device)
for epoch in range(num_epochs):
    running_loss = 0.0
    running_corrects = 0
    rnn.train()
    for sequences, labels in train_loader:
        optimizer.zero_grad()
        labels = labels.to(device)
        sequences = sequences.to(device)
        sequences = sequences.unsqueeze(1)
        outputs = rnn(sequences)
        
        # print(sequences.shape)
        # print(outputs.shape)
        # print(labels.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * sequences.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels)
    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() /len(train_dataset)
    print('Train Loss: {:.4f} Train Acc: {:.4f} %'.format(epoch_loss, epoch_acc*100))
    
    # test the model
    rnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        total_preds = []
        total_labels = []
        for sequences, labels in val_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            sequences = sequences.unsqueeze(1)
            outputs = rnn(sequences)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            total_preds.append(predicted)
            total_labels.append(labels)
        
        print('Epoch: {}, Test Accuracy of the model on the test sequences: {} %'.format(epoch, 100 * correct / total))
        total_preds = torch.cat(total_preds, dim=0)
        total_labels = torch.cat(total_labels, dim=0)
        print(get_precision_recall_f1(total_labels.cpu(), total_preds.cpu()))

Train Loss: 4.2927 Train Acc: 18.7041 %
Epoch: 0, Test Accuracy of the model on the test sequences: 36.27004873011263 %
{'precision': 0.3719240144743547, 'recall': 0.36735669044085306, 'f1': 0.32531710261567903}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 2.3597 Train Acc: 47.0761 %
Epoch: 1, Test Accuracy of the model on the test sequences: 53.306225191209215 %
{'precision': 0.5381971276026251, 'recall': 0.5335120526474874, 'f1': 0.5067731308629632}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 1.7119 Train Acc: 60.1520 %
Epoch: 2, Test Accuracy of the model on the test sequences: 61.70329493013199 %
{'precision': 0.6295597335609329, 'recall': 0.6188084273709336, 'f1': 0.6005461698545397}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 1.3641 Train Acc: 67.3527 %
Epoch: 3, Test Accuracy of the model on the test sequences: 66.94420240746119 %
{'precision': 0.6754449022112455, 'recall': 0.6721007604944914, 'f1': 0.6557440002475289}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 1.1300 Train Acc: 72.3751 %
Epoch: 4, Test Accuracy of the model on the test sequences: 71.05237680317552 %
{'precision': 0.717417430207837, 'recall': 0.7116567155806224, 'f1': 0.6978141566804732}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.9685 Train Acc: 75.8645 %
Epoch: 5, Test Accuracy of the model on the test sequences: 72.7659986445929 %
{'precision': 0.7342602026495495, 'recall': 0.7285153135734731, 'f1': 0.7146701308862609}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.8524 Train Acc: 78.2615 %
Epoch: 6, Test Accuracy of the model on the test sequences: 74.60547971730081 %
{'precision': 0.7512842487813347, 'recall': 0.748047165176264, 'f1': 0.7333625019838171}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.7670 Train Acc: 80.0219 %
Epoch: 7, Test Accuracy of the model on the test sequences: 76.09320037435053 %
{'precision': 0.7659801259926378, 'recall': 0.7621838726813206, 'f1': 0.7505179504352734}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.7019 Train Acc: 81.4201 %
Epoch: 8, Test Accuracy of the model on the test sequences: 76.15451641010746 %
{'precision': 0.7712844359288097, 'recall': 0.7631421168528992, 'f1': 0.7517165635385703}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.6543 Train Acc: 82.4674 %
Epoch: 9, Test Accuracy of the model on the test sequences: 77.57123955207022 %
{'precision': 0.7833836915786605, 'recall': 0.7775869132967486, 'f1': 0.7665058265588575}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.6154 Train Acc: 83.1628 %
Epoch: 10, Test Accuracy of the model on the test sequences: 78.03272340013554 %
{'precision': 0.7869343457212884, 'recall': 0.7806578973958582, 'f1': 0.7712282438619144}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.5847 Train Acc: 83.9011 %
Epoch: 11, Test Accuracy of the model on the test sequences: 77.91009132862168 %
{'precision': 0.7887674473332433, 'recall': 0.780940935911174, 'f1': 0.7723034557820195}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.5573 Train Acc: 84.4553 %
Epoch: 12, Test Accuracy of the model on the test sequences: 78.82660470519896 %
{'precision': 0.7976568285032967, 'recall': 0.7896627372657921, 'f1': 0.7807701370674864}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.5346 Train Acc: 85.0064 %
Epoch: 13, Test Accuracy of the model on the test sequences: 78.97828121470295 %
{'precision': 0.797511157965856, 'recall': 0.7912654977411647, 'f1': 0.7814424612077179}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.5160 Train Acc: 85.3323 %
Epoch: 14, Test Accuracy of the model on the test sequences: 79.24290831639074 %
{'precision': 0.8014872161001206, 'recall': 0.7952546163674283, 'f1': 0.7849958881709465}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.5003 Train Acc: 85.6462 %
Epoch: 15, Test Accuracy of the model on the test sequences: 79.14286636331364 %
{'precision': 0.7999819711869013, 'recall': 0.7934976959965054, 'f1': 0.7847467417290303}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4854 Train Acc: 85.9358 %
Epoch: 16, Test Accuracy of the model on the test sequences: 79.58498725271888 %
{'precision': 0.8053206063440814, 'recall': 0.7972919683428087, 'f1': 0.7890215591319081}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4708 Train Acc: 86.3336 %
Epoch: 17, Test Accuracy of the model on the test sequences: 80.11746861587118 %
{'precision': 0.8088886929448248, 'recall': 0.8027809295829106, 'f1': 0.7942274061062188}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4604 Train Acc: 86.5595 %
Epoch: 18, Test Accuracy of the model on the test sequences: 80.04324394100752 %
{'precision': 0.8108523170758138, 'recall': 0.8026692852213757, 'f1': 0.7945439586296097}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4482 Train Acc: 86.8620 %
Epoch: 19, Test Accuracy of the model on the test sequences: 79.8302513957466 %
{'precision': 0.8070049691246501, 'recall': 0.8002140862550229, 'f1': 0.7915410689076755}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4403 Train Acc: 86.9306 %
Epoch: 20, Test Accuracy of the model on the test sequences: 80.03033530190079 %
{'precision': 0.8096205617727115, 'recall': 0.8001452231138372, 'f1': 0.7930641854207956}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4298 Train Acc: 87.2622 %
Epoch: 21, Test Accuracy of the model on the test sequences: 80.53699938683964 %
{'precision': 0.8119198612847557, 'recall': 0.8060136715381147, 'f1': 0.798105893663717}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4192 Train Acc: 87.4841 %
Epoch: 22, Test Accuracy of the model on the test sequences: 80.12069577564785 %
{'precision': 0.8128182033108693, 'recall': 0.8034310809145018, 'f1': 0.7955801508975796}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4135 Train Acc: 87.6188 %
Epoch: 23, Test Accuracy of the model on the test sequences: 80.8435795656243 %
{'precision': 0.8189820067503673, 'recall': 0.8093779434439393, 'f1': 0.8029104686193356}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4067 Train Acc: 87.6737 %
Epoch: 24, Test Accuracy of the model on the test sequences: 80.82744376674088 %
{'precision': 0.8159706049172869, 'recall': 0.8107344797256567, 'f1': 0.8024823972786019}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.4009 Train Acc: 87.8471 %
Epoch: 25, Test Accuracy of the model on the test sequences: 80.74676477232387 %
{'precision': 0.8141915696171582, 'recall': 0.8092859989795655, 'f1': 0.801106877206941}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3925 Train Acc: 88.1142 %
Epoch: 26, Test Accuracy of the model on the test sequences: 80.80485364830413 %
{'precision': 0.8136084030450851, 'recall': 0.8093596512647837, 'f1': 0.8019513329708173}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3854 Train Acc: 88.1795 %
Epoch: 27, Test Accuracy of the model on the test sequences: 80.65317713880015 %
{'precision': 0.8150083467904923, 'recall': 0.8072217898915801, 'f1': 0.8000113895249737}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3806 Train Acc: 88.3449 %
Epoch: 28, Test Accuracy of the model on the test sequences: 81.14693258463227 %
{'precision': 0.8205336665935594, 'recall': 0.813888739595974, 'f1': 0.8059361001865526}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3766 Train Acc: 88.4296 %
Epoch: 29, Test Accuracy of the model on the test sequences: 80.76935489076064 %
{'precision': 0.8183177596808752, 'recall': 0.8089696146843438, 'f1': 0.8025434574255178}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3697 Train Acc: 88.6475 %
Epoch: 30, Test Accuracy of the model on the test sequences: 81.14693258463227 %
{'precision': 0.82060798771612, 'recall': 0.8132676841434926, 'f1': 0.8059152668063948}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3662 Train Acc: 88.6838 %
Epoch: 31, Test Accuracy of the model on the test sequences: 80.40145867621906 %
{'precision': 0.8135793018723707, 'recall': 0.8044748086600864, 'f1': 0.7971674962853424}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3608 Train Acc: 88.7693 %
Epoch: 32, Test Accuracy of the model on the test sequences: 81.16629554329235 %
{'precision': 0.8207044709071316, 'recall': 0.8127094180022268, 'f1': 0.8065362265812807}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3577 Train Acc: 88.8338 %
Epoch: 33, Test Accuracy of the model on the test sequences: 81.1243424661955 %
{'precision': 0.8208205529982461, 'recall': 0.8138246889720837, 'f1': 0.8066956674403027}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3527 Train Acc: 88.9492 %
Epoch: 34, Test Accuracy of the model on the test sequences: 81.10497950753542 %
{'precision': 0.819525776958536, 'recall': 0.8119470846579546, 'f1': 0.8043614239134037}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3483 Train Acc: 89.0646 %
Epoch: 35, Test Accuracy of the model on the test sequences: 80.927485719818 %
{'precision': 0.8194190738143571, 'recall': 0.8119584295159689, 'f1': 0.8040248255849262}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3434 Train Acc: 89.2534 %
Epoch: 36, Test Accuracy of the model on the test sequences: 81.32442637234969 %
{'precision': 0.8255700344436112, 'recall': 0.8150617030091674, 'f1': 0.808910182138932}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3404 Train Acc: 89.2897 %
Epoch: 37, Test Accuracy of the model on the test sequences: 81.21793009971924 %
{'precision': 0.8233394694636563, 'recall': 0.8137790850639053, 'f1': 0.8067947008636547}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3362 Train Acc: 89.4632 %
Epoch: 38, Test Accuracy of the model on the test sequences: 81.45028560364024 %
{'precision': 0.8231198070288938, 'recall': 0.8155216272183378, 'f1': 0.8086089957470234}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3345 Train Acc: 89.4930 %
Epoch: 39, Test Accuracy of the model on the test sequences: 81.63423371091103 %
{'precision': 0.8259504315213747, 'recall': 0.818391077104868, 'f1': 0.8114966029363582}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3307 Train Acc: 89.5261 %
Epoch: 40, Test Accuracy of the model on the test sequences: 81.31151773324298 %
{'precision': 0.8234333163414367, 'recall': 0.8154582676069853, 'f1': 0.8091466366857237}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3255 Train Acc: 89.7092 %
Epoch: 41, Test Accuracy of the model on the test sequences: 81.67618678800787 %
{'precision': 0.8223765563773047, 'recall': 0.8182375589017465, 'f1': 0.8108878673544517}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3232 Train Acc: 89.7843 %
Epoch: 42, Test Accuracy of the model on the test sequences: 81.8375447768419 %
{'precision': 0.8269848404531946, 'recall': 0.82000546695176, 'f1': 0.8128390367787709}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3212 Train Acc: 89.7964 %
Epoch: 43, Test Accuracy of the model on the test sequences: 81.70845838577468 %
{'precision': 0.8251397594020009, 'recall': 0.817481165939303, 'f1': 0.8114118399104576}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3169 Train Acc: 89.8399 %
Epoch: 44, Test Accuracy of the model on the test sequences: 81.80204601929842 %
{'precision': 0.825105328127502, 'recall': 0.8187165451222376, 'f1': 0.8124103370919332}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3148 Train Acc: 89.9359 %
Epoch: 45, Test Accuracy of the model on the test sequences: 81.472875722077 %
{'precision': 0.8234788236667305, 'recall': 0.8154675930660488, 'f1': 0.8093108986768968}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3119 Train Acc: 90.0537 %
Epoch: 46, Test Accuracy of the model on the test sequences: 81.5051473198438 %
{'precision': 0.8214921028354256, 'recall': 0.8171117210925045, 'f1': 0.809571708755337}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3092 Train Acc: 90.1522 %
Epoch: 47, Test Accuracy of the model on the test sequences: 81.88272501371543 %
{'precision': 0.8271668575939382, 'recall': 0.8201213697881872, 'f1': 0.813249118810616}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3084 Train Acc: 90.0045 %
Epoch: 48, Test Accuracy of the model on the test sequences: 81.74718430309484 %
{'precision': 0.8263390971586377, 'recall': 0.8180449800287127, 'f1': 0.8108010770695252}


  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 0.3035 Train Acc: 90.1990 %
Epoch: 49, Test Accuracy of the model on the test sequences: 81.97308548746248 %
{'precision': 0.8275994097483517, 'recall': 0.8210039022559237, 'f1': 0.8149194164143234}


  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# save model
torch.save(rnn.state_dict(), 'rnn_model_.ckpt')


In [None]:
print(total_preds.unique().shape)
print(total_labels.unique().shape)
print("Accuracy:{:.4f}".format(torch.sum(total_preds == total_labels).item()/len(total_labels)))
print(total_preds.shape)
print("Precision:{:.4f} Recall:{:.4f} F1:{:.4f}".format(get_precision_recall_f1(total_labels.cpu(), total_preds.cpu())['precision'], get_precision_recall_f1(total_labels.cpu(), total_preds.cpu())['recall'], get_precision_recall_f1(total_labels.cpu(), total_preds.cpu())['f1']))

In [None]:
# Evaluate the model
running_loss = 0.0
running_corrects = 0
rnn.eval()
with torch.no_grad():
    for sequences, labels in val_loader:
        labels = labels.to(device)
        sequences = sequences.to(device)
        sequences = sequences.unsqueeze(1)
        outputs = rnn(sequences)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * sequences.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels)
epoch_loss = running_loss / len(val_dataset)
epoch_acc = running_corrects.double() / len(val_dataset)
print('Val Loss: {:.4f} Val Acc: {:.4f}'.format(epoch_loss, epoch_acc))


In [None]:
accuracy = model.predict(test_data)
print("Accuracy: ", accuracy,"%")

In [None]:
a = torch.tensor([[1]])