In [45]:
import pandas as pd
import numpy as np
import nltk
from sklearn.metrics import classification_report
from sklearn.linear_model import Perceptron
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer


In [46]:
from gensim.models import word2vec
from gensim.models import KeyedVectors

In [47]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [48]:
df=pd.read_csv(r"C:\Users\Venkatesh Dharmaraj\Downloads\NLP Project\data.csv",na_filter=False)

In [49]:
test_df=pd.read_csv(r"C:\Users\Venkatesh Dharmaraj\Downloads\NLP Project\test_data.csv",na_filter=False)

In [50]:
df.shape

(178264, 6)

In [51]:
def entity_representation1(df):
    for i,row in df.iterrows():
        gs,ge=map(int,row['gene_index'][1:-1].split(','))
        ds,dt=map(int,row['disease_index'][1:-1].split(','))
        row['sentence'] = row['sentence'][:gs]+'@ * gene * '+row['sentence'][gs:gs+ge]+' @'+row['sentence'][gs+ge:]
        row['sentence'] = row['sentence'][:ds]+'# ^ disease ^ '+row['sentence'][ds:ds+dt]+' #'+row['sentence'][ds+dt:]
    return df

In [52]:
df=entity_representation1(df)
test_df=entity_representation1(test_df)

In [53]:
test_df['sentence'][0]

'In addition, the combined # ^ disease ^ cancer # genome expression metaanalysis datasets included @ * gene * PDE11A @ among the top 1% down-regulated genes in PCa.'

In [54]:
# df['Sentence'] = df['sentence']+df['gene']+df['disease']
# df=df[df.columns[[3,4]]]

In [55]:
df=df[df.columns[[0,3]]]
df= df.rename({'relation': 'Relation','sentence':'Sentence'},axis=1)

In [56]:
test_df=test_df[test_df.columns[[0,3]]]
test_df= test_df.rename({'relation': 'Relation','sentence':'Sentence'},axis=1)

In [57]:
df=df.dropna()
test_df = test_df.dropna()

In [58]:
df['Relation'].value_counts()

NA                     122149
genomic_alterations     32831
biomarker               20145
therapeutic              3139
Name: Relation, dtype: int64

In [59]:
test_df['Relation'].value_counts()

NA                     15608
biomarker               2315
genomic_alterations     2209
therapeutic              384
Name: Relation, dtype: int64

In [60]:
df.Relation = df.Relation.replace({'NA':0,
                         'genomic_alterations':1,
                         'biomarker':2,
                         'therapeutic':3,
                         })

In [61]:
test_df.Relation = test_df.Relation.replace({'NA':0,
                         'genomic_alterations':1,
                         'biomarker':2,
                         'therapeutic':3,
                         })

In [62]:
# df=df.groupby('Relation').apply(lambda x: x.sample(40000,replace=True,random_state=1)).reset_index(drop=True)

In [63]:
def tokenize(df):
  data=[]
  for i in range(len(df['Relation'])):
    data.append(nltk.word_tokenize(df['Sentence'][i]))
  return data

In [64]:
data = tokenize(df)
test_data = tokenize(test_df)

In [65]:
data[0]

['A',
 '@',
 '*',
 'gene',
 '*',
 'monocyte',
 'chemoattractant',
 'protein-1',
 '@',
 'gene',
 'polymorphism',
 'is',
 'associated',
 'with',
 'occult',
 'ischemia',
 'in',
 '#',
 '^',
 'disease',
 '^',
 'a',
 'high-risk',
 '#',
 'asymptomatic',
 'population',
 '.']

In [66]:
import gensim.downloader as api
model = api.load("word2vec-google-news-300")

In [67]:
def generate_vectors(data):
    X = []
    for review in data:
        word_vectors = []
        for word in review:
            try:
                word_vectors.append(model[word])
            except KeyError:
                pass
        if not word_vectors:
            X.append(np.zeros(300))
        else:
            X.append(np.mean(word_vectors, axis=0))
    X = np.array(X)
    return X

In [68]:
X_traind, y_train= data,df['Relation']

In [69]:
X_traind[0]

['A',
 '@',
 '*',
 'gene',
 '*',
 'monocyte',
 'chemoattractant',
 'protein-1',
 '@',
 'gene',
 'polymorphism',
 'is',
 'associated',
 'with',
 'occult',
 'ischemia',
 'in',
 '#',
 '^',
 'disease',
 '^',
 'a',
 'high-risk',
 '#',
 'asymptomatic',
 'population',
 '.']

In [70]:
X_testd, y_test = test_data , test_df['Relation']

In [71]:
X_train1,y_train1 = df['Sentence'],df['Relation']
X_test1,y_test1 = test_df['Sentence'], test_df['Relation']

In [72]:
print(len(X_traind),len(X_testd))
print(X_train1.shape,X_test1.shape)

178264 20516
(178264,) (20516,)


In [73]:
vect=TfidfVectorizer(ngram_range=(1,2),
                        use_idf=True).fit(df['Sentence'])

In [74]:
X_train_vectorized=vect.transform(X_train1)
X_test_vectorized = vect.transform(X_test1)

In [75]:
X_train_vectorized.shape

(178264, 1213019)

In [76]:
y_train=y_train.astype('int')
y_test=y_test.astype('int')
y_train1=y_train1.astype('int')
y_test1=y_test1.astype('int')
X_train=generate_vectors(X_traind)
X_test=generate_vectors(X_testd)

Perceptron - With TFIDF

In [77]:
perceptron_tfidf_clf = Perceptron()
perceptron_tfidf_clf.fit(X_train_vectorized, y_train1)

In [78]:
perceptron_tfidf_pred=perceptron_tfidf_clf.predict(vect.transform(X_test1))
print(classification_report(y_test1,perceptron_tfidf_pred))

              precision    recall  f1-score   support

           0       0.87      0.93      0.90     15608
           1       0.58      0.70      0.63      2209
           2       0.45      0.19      0.27      2315
           3       0.43      0.24      0.30       384

    accuracy                           0.81     20516
   macro avg       0.58      0.51      0.53     20516
weighted avg       0.79      0.81      0.79     20516



Perceptron with Word2Vec

In [79]:
perceptron_clf = Perceptron()
perceptron_clf.fit(X_train, y_train)

In [80]:
perceptron_pred=perceptron_clf.predict(X_test)
print(classification_report(y_test, perceptron_pred))

              precision    recall  f1-score   support

           0       0.81      0.94      0.87     15608
           1       0.50      0.44      0.47      2209
           2       0.34      0.09      0.14      2315
           3       0.50      0.03      0.05       384

    accuracy                           0.77     20516
   macro avg       0.54      0.37      0.38     20516
weighted avg       0.72      0.77      0.73     20516



SVM With TF-IDF

In [81]:
svm_tfidf_clf=LinearSVC()
svm_tfidf_clf.fit(X_train_vectorized, y_train1)

In [87]:
svm_tfidf_pred=svm_tfidf_clf.predict(vect.transform(X_test1))
print(classification_report(y_test1,svm_tfidf_pred,digits=4))

              precision    recall  f1-score   support

           0     0.8648    0.9514    0.9060     15608
           1     0.6228    0.6913    0.6552      2209
           2     0.5081    0.1624    0.2462      2315
           3     0.5033    0.2005    0.2868       384

    accuracy                         0.8203     20516
   macro avg     0.6247    0.5014    0.5235     20516
weighted avg     0.7917    0.8203    0.7930     20516



SVM with Word2Vec

In [88]:
svm_clf=LinearSVC()
svm_clf.fit(X_train, y_train)

In [89]:
svm_pred=svm_clf.predict(X_test)
print(classification_report(y_test, svm_pred,digits=4))

              precision    recall  f1-score   support

           0     0.8116    0.9461    0.8737     15608
           1     0.5185    0.4685    0.4923      2209
           2     0.3275    0.0406    0.0723      2315
           3     0.3077    0.0312    0.0567       384

    accuracy                         0.7754     20516
   macro avg     0.4913    0.3716    0.3737     20516
weighted avg     0.7160    0.7754    0.7269     20516



Feedforward Neural Networks

In [90]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.fc3 = nn.Linear(hidden_dim2, output_dim)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.relu(self.fc2(out))
        out = self.softmax(self.fc3(out))
        return out
def call_MLP(inputdim,X_train,X_test,y_train,y_test):
    input_dim = inputdim
    hidden_dim1 = 256
    hidden_dim2 = 64
    output_dim = 4 
    learning_rate = 0.05
    num_epochs = 10
    batch_size = 64

    train_data = X_train
    train_labels = y_train.to_numpy()
    test_data = X_test
    test_labels = y_test.to_numpy()

    train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data).float().to(device), torch.from_numpy(train_labels).long().to(device))
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    MLP_model = MLP(input_dim, hidden_dim1, hidden_dim2, output_dim).to(device)
    criterion = nn.CrossEntropyLoss(weight=torch.Tensor([1,2,4,8]).to(device))
    optimizer = optim.Adam(MLP_model.parameters(), lr=learning_rate)

    # Train the model
    for epoch in range(num_epochs):
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = MLP_model(inputs.to(device))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            if (i+1) % 100 == 0:
                print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

    # Test the model
    with torch.no_grad():
        MLP_model.eval()
        test_data = torch.from_numpy(test_data).float().to(device)
        test_labels = torch.from_numpy(test_labels).long().to(device)
        outputs = MLP_model(test_data).to(device)
        outputs_cpu = outputs.cpu()
        _, predicted = torch.max(outputs_cpu.data, 1)
        predicted = predicted.cpu().numpy()

        total = test_labels.size(0)
        correct = (predicted == test_labels.cpu().numpy()).sum().item()
        print(classification_report(predicted,test_labels.cpu().numpy(),digits=4))
        print('Test Accuracy: {:.2f}%'.format(100 * correct / total))
        return outputs


In [91]:
outputs=call_MLP(300,X_train,X_test,y_train,y_test)
# print(classification_report(predicted,test_labels.cpu().numpy()))
# print('Test Accuracy: {:.2f}%'.format(100 * correct / total))

Epoch [1/10], Batch [100/2786], Loss: 1.3872
Epoch [1/10], Batch [200/2786], Loss: 1.4279
Epoch [1/10], Batch [300/2786], Loss: 1.3958
Epoch [1/10], Batch [400/2786], Loss: 1.5716
Epoch [1/10], Batch [500/2786], Loss: 1.4134
Epoch [1/10], Batch [600/2786], Loss: 1.4510
Epoch [1/10], Batch [700/2786], Loss: 1.3676
Epoch [1/10], Batch [800/2786], Loss: 1.4654
Epoch [1/10], Batch [900/2786], Loss: 1.3676
Epoch [1/10], Batch [1000/2786], Loss: 1.4103
Epoch [1/10], Batch [1100/2786], Loss: 1.5060
Epoch [1/10], Batch [1200/2786], Loss: 1.4474
Epoch [1/10], Batch [1300/2786], Loss: 1.4520
Epoch [1/10], Batch [1400/2786], Loss: 1.3577
Epoch [1/10], Batch [1500/2786], Loss: 1.4605
Epoch [1/10], Batch [1600/2786], Loss: 1.3363
Epoch [1/10], Batch [1700/2786], Loss: 1.5275
Epoch [1/10], Batch [1800/2786], Loss: 1.3865
Epoch [1/10], Batch [1900/2786], Loss: 1.4835
Epoch [1/10], Batch [2000/2786], Loss: 1.3591
Epoch [1/10], Batch [2100/2786], Loss: 1.4103
Epoch [1/10], Batch [2200/2786], Loss: 1.38

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [92]:
def generate_vectors_3(data):
    inputs = []
    for review in data:
        review_vecs = []
        for i, word in enumerate(review):
            # print(word in model)
            if i < 20:
                if word in model:
                    review_vecs.append(model[word])
        if len(review_vecs) < 20:
            review_vecs += [np.zeros(300)] * (20 - len(review_vecs))
        inputs.append(review_vecs)
    return np.array(inputs)

In [93]:
X_train3=generate_vectors_3(X_traind)
X_test3=generate_vectors_3(X_testd)

KeyboardInterrupt: 

In [None]:
import torch

num_samples = [122149, 32831, 20145, 3139] # number of samples in each class
total_samples = sum(num_samples)
class_weights = torch.Tensor([total_samples / num_samples[i] for i in range(len(num_samples))])

In [None]:
class_weights

tensor([ 1.4594,  5.4297,  8.8490, 56.7901])

Simple RNN

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)
        out, hidden = self.rnn(x, hidden)
        out=self.fc(out[:,-1,:])
        return out
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden

train_data = X_train3
train_labels = y_train.to_numpy()
test_data = X_test3
test_labels = y_test.to_numpy()
num_epochs = 40
batch_size = 128

train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data).float(), torch.from_numpy(train_labels).long())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)   

RNN_model = RNNModel(300,4,20,1)
optimizer = torch.optim.Adam(RNN_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train model
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = RNN_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
            
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))
 
 # Test the model
with torch.no_grad():
    RNN_model.eval()
    test_data = torch.from_numpy(test_data).float()
    test_labels = torch.from_numpy(test_labels).long()
    outputs = RNN_model(test_data)
    _, predicted = torch.max(outputs.data, 1)
    total = test_labels.size(0)
    correct = (predicted == test_labels).sum().item()
    print(classification_report(predicted,test_labels))
    print('Test Accuracy: {:.2f}%'.format(100 * correct / total))


Epoch [1/15], Batch [100/2786], Loss: 0.8649
Epoch [1/15], Batch [200/2786], Loss: 1.0431
Epoch [1/15], Batch [300/2786], Loss: 1.0390
Epoch [1/15], Batch [400/2786], Loss: 0.7495
Epoch [1/15], Batch [500/2786], Loss: 0.8866
Epoch [1/15], Batch [600/2786], Loss: 0.8658
Epoch [1/15], Batch [700/2786], Loss: 0.8176
Epoch [1/15], Batch [800/2786], Loss: 0.8524
Epoch [1/15], Batch [900/2786], Loss: 0.7771
Epoch [1/15], Batch [1000/2786], Loss: 0.7158
Epoch [1/15], Batch [1100/2786], Loss: 0.7328
Epoch [1/15], Batch [1200/2786], Loss: 0.9519
Epoch [1/15], Batch [1300/2786], Loss: 0.6836
Epoch [1/15], Batch [1400/2786], Loss: 0.6499
Epoch [1/15], Batch [1500/2786], Loss: 0.6568
Epoch [1/15], Batch [1600/2786], Loss: 0.8016
Epoch [1/15], Batch [1700/2786], Loss: 0.6694
Epoch [1/15], Batch [1800/2786], Loss: 0.9220
Epoch [1/15], Batch [1900/2786], Loss: 0.8149
Epoch [1/15], Batch [2000/2786], Loss: 0.6134
Epoch [1/15], Batch [2100/2786], Loss: 0.8591
Epoch [1/15], Batch [2200/2786], Loss: 0.84

KeyboardInterrupt: 

In [None]:
print(classification_report(predicted,test_labels))
print('Test Accuracy: {:.2f}%'.format(100 * correct / total))


              precision    recall  f1-score   support

           0       0.49      0.88      0.63      8762
           1       0.75      0.29      0.41      5765
           2       0.34      0.17      0.23      4568
           3       0.38      0.10      0.16      1421

    accuracy                           0.50     20516
   macro avg       0.49      0.36      0.36     20516
weighted avg       0.52      0.50      0.45     20516

Test Accuracy: 50.22%


In [None]:
# from sklearn.decomposition import TruncatedSVD
# svd = TruncatedSVD(n_components=1000)
# X_train_svd = svd.fit_transform(X_train_vectorized)
# X_test_svd = svd.transform(X_test_vectorized)

GRU

In [None]:
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GRUModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.gru = nn.GRU(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x, h):
        out, h = self.gru(x, h)
        out = self.fc(self.relu(out[:, -1]))
        return out, h

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_dim)


train_data = X_train3
train_labels = y_train.to_numpy()
test_data = X_test3
test_labels = y_test.to_numpy()
num_epochs = 40
batch_size = 128

train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data).float(), torch.from_numpy(train_labels).long())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)   

GRU_model = GRUModel(300,20,4)
optimizer = torch.optim.Adam(GRU_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train model
for epoch in range(num_epochs):
    h = GRU_model.init_hidden(batch_size)
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        batch_size = inputs.size(0)
        hidden = GRU_model.init_hidden(batch_size) # fixed the model -> GRU_model
        outputs, hidden = GRU_model(inputs, hidden) # fixed the model -> GRU_model
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
                
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))
 
 # Test the model
with torch.no_grad():
    GRU_model.eval()
    test_data = torch.from_numpy(test_data).float()
    test_labels = torch.from_numpy(test_labels).long()
    h = GRU_model.init_hidden(test_data.shape[0])
    outputs,h = GRU_model(test_data,h)
    _, predicted = torch.max(outputs.data, 1)
    total = test_labels.size(0)
    correct = (predicted == test_labels).sum().item()
    print('Test Accuracy: {:.2f}%'.format(100 * correct / total))


Epoch [1/15], Batch [100/2500], Loss: 1.3744
Epoch [1/15], Batch [200/2500], Loss: 1.1485
Epoch [1/15], Batch [300/2500], Loss: 1.0646
Epoch [1/15], Batch [400/2500], Loss: 1.0217
Epoch [1/15], Batch [500/2500], Loss: 1.1138
Epoch [1/15], Batch [600/2500], Loss: 1.0710
Epoch [1/15], Batch [700/2500], Loss: 0.8620
Epoch [1/15], Batch [800/2500], Loss: 0.8716
Epoch [1/15], Batch [900/2500], Loss: 0.9431
Epoch [1/15], Batch [1000/2500], Loss: 0.8734
Epoch [1/15], Batch [1100/2500], Loss: 0.8179
Epoch [1/15], Batch [1200/2500], Loss: 0.9804
Epoch [1/15], Batch [1300/2500], Loss: 0.9815
Epoch [1/15], Batch [1400/2500], Loss: 0.8246
Epoch [1/15], Batch [1500/2500], Loss: 0.8464
Epoch [1/15], Batch [1600/2500], Loss: 0.7522
Epoch [1/15], Batch [1700/2500], Loss: 0.8195
Epoch [1/15], Batch [1800/2500], Loss: 0.8562
Epoch [1/15], Batch [1900/2500], Loss: 0.8578
Epoch [1/15], Batch [2000/2500], Loss: 0.9529
Epoch [1/15], Batch [2100/2500], Loss: 0.9138
Epoch [1/15], Batch [2200/2500], Loss: 0.81

KeyboardInterrupt: 

In [None]:
print(classification_report(predicted,test_labels))
print('Test Accuracy: {:.2f}%'.format(100 * correct / total))

              precision    recall  f1-score   support

           0       0.54      0.90      0.68      9430
           1       0.80      0.32      0.46      5508
           2       0.41      0.18      0.25      5175
           3       0.23      0.22      0.23       403

    accuracy                           0.55     20516
   macro avg       0.49      0.41      0.40     20516
weighted avg       0.57      0.55      0.50     20516

Test Accuracy: 54.94%


LSTM


In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        
    def forward(self, x, h):
        out, h = self.lstm(x, h)
        out = self.fc(self.relu(out[:,-1]))
        return out, h
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
              weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
              )
        return hidden

train_data = X_train3
train_labels = y_train.to_numpy()
test_data = X_test3
test_labels = y_test.to_numpy()
num_epochs = 40
batch_size = 128

train_dataset = torch.utils.data.TensorDataset(torch.from_numpy(train_data).float(), torch.from_numpy(train_labels).long())
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)   

LSTM_model = LSTMModel(300,40,4,1)
optimizer = torch.optim.Adam(LSTM_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train model
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        batch_size = inputs.shape[0]
        h = LSTM_model.init_hidden(batch_size)
        h = tuple([e.data for e in h])
        optimizer.zero_grad()
        outputs,h = LSTM_model(inputs,h)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
            
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Batch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

# with torch.no_grad():
#     LSTM_model.eval()
#     test_data = torch.from_numpy(test_data).float()
#     test_labels = torch.from_numpy(test_labels).long()
#     batch_size = test_data.shape[0]
#     seq_length = 1  # we are processing each sample as a single sequence
#     input_dim = test_data.shape[1]
#     test_data = test_data.view(batch_size, seq_length, input_dim)  # reshape the input tensor
#     h = LSTM_model.init_hidden(batch_size)
#     outputs, h = LSTM_model(test_data, h)
#     _, predicted = torch.max(outputs.data, 1)
#     total = test_labels.size(0)
#     correct = (predicted == test_labels).sum().item()
#     print(classification_report(predicted, test_labels))
#     print('Test Accuracy: {:.2f}%'.format(100 * correct / total))
#  Test the model
with torch.no_grad():
    LSTM_model.eval()
    test_data = torch.from_numpy(test_data).float()
    test_labels = torch.from_numpy(test_labels).long()
    h = LSTM_model.init_hidden(test_data.shape[0])
    outputs,h = LSTM_model(test_data,h)
    _, predicted = torch.max(outputs.data, 1)
    total = test_labels.size(0)
    correct = (predicted == test_labels).sum().item()
    print(classification_report(predicted,test_labels))

    print('Test Accuracy: {:.2f}%'.format(100 * correct / total))



Epoch [1/15], Batch [100/1250], Loss: 1.2216
Epoch [1/15], Batch [200/1250], Loss: 1.1098
Epoch [1/15], Batch [300/1250], Loss: 1.1045
Epoch [1/15], Batch [400/1250], Loss: 1.0352
Epoch [1/15], Batch [500/1250], Loss: 1.1957
Epoch [1/15], Batch [600/1250], Loss: 0.8608
Epoch [1/15], Batch [700/1250], Loss: 0.7943
Epoch [1/15], Batch [800/1250], Loss: 0.9694
Epoch [1/15], Batch [900/1250], Loss: 0.8232
Epoch [1/15], Batch [1000/1250], Loss: 0.8560
Epoch [1/15], Batch [1100/1250], Loss: 0.9756
Epoch [1/15], Batch [1200/1250], Loss: 0.8900
Epoch [2/15], Batch [100/1250], Loss: 1.0175
Epoch [2/15], Batch [200/1250], Loss: 0.8401
Epoch [2/15], Batch [300/1250], Loss: 0.9116
Epoch [2/15], Batch [400/1250], Loss: 0.8208
Epoch [2/15], Batch [500/1250], Loss: 0.8517
Epoch [2/15], Batch [600/1250], Loss: 0.7067
Epoch [2/15], Batch [700/1250], Loss: 0.7755
Epoch [2/15], Batch [800/1250], Loss: 0.8036
Epoch [2/15], Batch [900/1250], Loss: 0.7509
Epoch [2/15], Batch [1000/1250], Loss: 0.7364
Epoch 

In [None]:
print(classification_report(predicted,test_labels))

print('Test Accuracy: {:.2f}%'.format(100 * correct / total))

              precision    recall  f1-score   support

           0       0.51      0.91      0.66      8871
           1       0.82      0.30      0.44      6024
           2       0.38      0.18      0.24      5067
           3       0.25      0.17      0.20       554

    accuracy                           0.53     20516
   macro avg       0.49      0.39      0.38     20516
weighted avg       0.56      0.53      0.48     20516

Test Accuracy: 52.74%
