<a href="https://colab.research.google.com/github/elsaimo/4106_Final_project/blob/main/SelenaNahraFinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Loading Dataset & Preprocessing**

In [None]:
import torch
import pandas as pd
import category_encoders as ce
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("heart_disease_uci.csv")
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 920 entries, 0 to 919
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   id        920 non-null    int64  
 1   age       920 non-null    int64  
 2   sex       920 non-null    object 
 3   dataset   920 non-null    object 
 4   cp        920 non-null    object 
 5   trestbps  861 non-null    float64
 6   chol      890 non-null    float64
 7   fbs       830 non-null    object 
 8   restecg   918 non-null    object 
 9   thalch    865 non-null    float64
 10  exang     865 non-null    object 
 11  oldpeak   858 non-null    float64
 12  slope     611 non-null    object 
 13  ca        309 non-null    float64
 14  thal      434 non-null    object 
 15  num       920 non-null    int64  
dtypes: float64(5), int64(3), object(8)
memory usage: 115.1+ KB


Unnamed: 0,id,age,sex,dataset,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,num
0,1,63,Male,Cleveland,typical angina,145.0,233.0,True,lv hypertrophy,150.0,False,2.3,downsloping,0.0,fixed defect,0
1,2,67,Male,Cleveland,asymptomatic,160.0,286.0,False,lv hypertrophy,108.0,True,1.5,flat,3.0,normal,2
2,3,67,Male,Cleveland,asymptomatic,120.0,229.0,False,lv hypertrophy,129.0,True,2.6,flat,2.0,reversable defect,1
3,4,37,Male,Cleveland,non-anginal,130.0,250.0,False,normal,187.0,False,3.5,downsloping,0.0,normal,0
4,5,41,Female,Cleveland,atypical angina,130.0,204.0,False,lv hypertrophy,172.0,False,1.4,upsloping,0.0,normal,0


In [None]:
#remove irrelevant columns
df = df.drop(columns=['id','dataset'])

#remove columns with null values
df.isnull().values.any()
df = df.dropna()

df.info()
print(df.columns)

<class 'pandas.core.frame.DataFrame'>
Index: 299 entries, 0 to 748
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       299 non-null    int64  
 1   sex       299 non-null    object 
 2   cp        299 non-null    object 
 3   trestbps  299 non-null    float64
 4   chol      299 non-null    float64
 5   fbs       299 non-null    object 
 6   restecg   299 non-null    object 
 7   thalch    299 non-null    float64
 8   exang     299 non-null    object 
 9   oldpeak   299 non-null    float64
 10  slope     299 non-null    object 
 11  ca        299 non-null    float64
 12  thal      299 non-null    object 
 13  num       299 non-null    int64  
dtypes: float64(5), int64(2), object(7)
memory usage: 35.0+ KB
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num'],
      dtype='object')


In [None]:
#encode Male = 0 & Female = 1
df['sex'] = df['sex'].map({'Female': 1, 'Male': 0})

#encode cp, restecg, slope, thal
onehot_encoder = ce.OneHotEncoder(cols=['cp', 'restecg', 'slope', 'thal'])
df = onehot_encoder.fit_transform(df)

#True = 1, False = 0
df[['fbs', 'exang']] = df[['fbs', 'exang']].astype(int)

#scale age, trestbps, chol, thalach, oldpeak, ca
scaler = StandardScaler()
columns_to_scale = ['age', 'trestbps', 'chol', 'thalch', 'oldpeak','ca']
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

df.head()

Unnamed: 0,age,sex,cp_1,cp_2,cp_3,cp_4,trestbps,chol,fbs,restecg_1,...,exang,oldpeak,slope_1,slope_2,slope_3,ca,thal_1,thal_2,thal_3,num
0,0.940446,0,1,0,0,0,0.74976,-0.262867,1,1,...,0,1.069475,1,0,0,-0.718306,1,0,0,0
1,1.384143,0,0,1,0,0,1.596354,0.747722,0,1,...,1,0.380309,0,1,0,2.487269,0,1,0,2
2,1.384143,0,0,1,0,0,-0.661231,-0.339138,0,1,...,1,1.327912,0,1,0,1.418744,0,0,1,1
3,-1.943588,0,0,0,1,0,-0.096835,0.061285,0,0,...,0,2.103224,1,0,0,-0.718306,0,1,0,0
4,-1.499891,1,0,0,0,1,-0.096835,-0.81583,0,1,...,0,0.294163,0,0,1,-0.718306,0,1,0,0


In [None]:
class_distribution = df['num'].value_counts()
sorted_distribution = class_distribution.sort_index()
print(sorted_distribution)

num
0    160
1     56
2     35
3     35
4     13
Name: count, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split
import time
from imblearn.over_sampling import SMOTE
import torch.nn as nn
import torch.optim as optim
import numpy as np

X = df.drop(columns=["num"])
y = df["num"]

print("Shape of X", X.shape)
print("Shape of y", y.shape)

smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

print(y_resampled.value_counts())

X = X_resampled.values
y = y_resampled.values.reshape(-1, 1)

print("Shape of X", X.shape)
print("Shape of y", y.shape)


# 80-20 Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print('Shape of X_train:', X_train.shape)
print('Shape of y_train:', y_train.shape)
print('Shape of X_val:', X_val.shape)
print('Shape of y_val:', y_val.shape)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long).squeeze()
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long).squeeze()

# Reshape X_val to include a batch dimension
X_val = X_val.unsqueeze(1)
X_train = X_train.unsqueeze(1)

print('Shape of X_train:', X_train.shape)
print('Shape of y_train:', y_train.shape)
print('Shape of X_val:', X_val.shape)
print('Shape of y_val:', y_val.shape)

Shape of X (299, 22)
Shape of y (299,)
num
0    160
2    160
1    160
3    160
4    160
Name: count, dtype: int64
Shape of X (800, 22)
Shape of y (800, 1)
Shape of X_train: (640, 22)
Shape of y_train: (640, 1)
Shape of X_val: (160, 22)
Shape of y_val: (160, 1)
Shape of X_train: torch.Size([640, 1, 22])
Shape of y_train: torch.Size([640])
Shape of X_val: torch.Size([160, 1, 22])
Shape of y_val: torch.Size([160])


**Hidden Size 128**

In [None]:
# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 128
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 64

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 1.0591682195663452, Validation Loss: 1.1160188913345337, Validation Accuracy: 0.574999988079071
Epoch 20, Loss: 0.856102466583252, Validation Loss: 1.026399850845337, Validation Accuracy: 0.606249988079071
Epoch 30, Loss: 0.6951613426208496, Validation Loss: 0.912550151348114, Validation Accuracy: 0.637499988079071
Epoch 40, Loss: 0.5420763492584229, Validation Loss: 0.8023080825805664, Validation Accuracy: 0.7250000238418579
Epoch 50, Loss: 0.40821585059165955, Validation Loss: 0.7227813005447388, Validation Accuracy: 0.78125
Epoch 60, Loss: 0.30136436223983765, Validation Loss: 0.668775200843811, Validation Accuracy: 0.793749988079071
Epoch 70, Loss: 0.21454282104969025, Validation Loss: 0.6419416069984436, Validation Accuracy: 0.8062499761581421
Epoch 80, Loss: 0.1435394585132599, Validation Loss: 0.6399978399276733, Validation Accuracy: 0.824999988079071
Epoch 90, Loss: 0.08995769917964935, Validation Loss: 0.6548817753791809, Validation Accuracy: 0.8374999761581421

**Hidden Size 512**

In [None]:
# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 64

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 0.8795229196548462, Validation Loss: 1.044177770614624, Validation Accuracy: 0.606249988079071
Epoch 20, Loss: 0.6609216928482056, Validation Loss: 0.8775450587272644, Validation Accuracy: 0.6875
Epoch 30, Loss: 0.43196922540664673, Validation Loss: 0.7662487030029297, Validation Accuracy: 0.7562500238418579
Epoch 40, Loss: 0.25723493099212646, Validation Loss: 0.665778398513794, Validation Accuracy: 0.7875000238418579
Epoch 50, Loss: 0.13114507496356964, Validation Loss: 0.646744430065155, Validation Accuracy: 0.8187500238418579
Epoch 60, Loss: 0.05285505950450897, Validation Loss: 0.6945980787277222, Validation Accuracy: 0.8374999761581421
Epoch 70, Loss: 0.024163929745554924, Validation Loss: 0.7549999356269836, Validation Accuracy: 0.8374999761581421
Epoch 80, Loss: 0.013895063661038876, Validation Loss: 0.8065217733383179, Validation Accuracy: 0.84375
Epoch 90, Loss: 0.009071729145944118, Validation Loss: 0.8483728170394897, Validation Accuracy: 0.84375
Epoch 100, 

**Hidden Size 1024**

In [None]:
# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 1024
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 64

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 0.828528106212616, Validation Loss: 1.0116965770721436, Validation Accuracy: 0.606249988079071
Epoch 20, Loss: 0.5239249467849731, Validation Loss: 0.8203579187393188, Validation Accuracy: 0.75
Epoch 30, Loss: 0.28379368782043457, Validation Loss: 0.6947687864303589, Validation Accuracy: 0.78125
Epoch 40, Loss: 0.11510872095823288, Validation Loss: 0.7089089155197144, Validation Accuracy: 0.831250011920929
Epoch 50, Loss: 0.028799468651413918, Validation Loss: 0.8193429708480835, Validation Accuracy: 0.84375
Epoch 60, Loss: 0.012128218077123165, Validation Loss: 0.906078040599823, Validation Accuracy: 0.8500000238418579
Epoch 70, Loss: 0.006811951752752066, Validation Loss: 0.966629683971405, Validation Accuracy: 0.84375
Epoch 80, Loss: 0.0043959286995232105, Validation Loss: 1.0121972560882568, Validation Accuracy: 0.8500000238418579
Epoch 90, Loss: 0.003079099114984274, Validation Loss: 1.0485589504241943, Validation Accuracy: 0.8500000238418579
Epoch 100, Loss: 0.002

**Smaller Learning Rate**

In [None]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 50
batch_size = 64

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 1.4665449857711792, Validation Loss: 1.4838459491729736, Validation Accuracy: 0.46875
Epoch 20, Loss: 1.1961530447006226, Validation Loss: 1.277030110359192, Validation Accuracy: 0.5
Epoch 30, Loss: 1.0946826934814453, Validation Loss: 1.1799120903015137, Validation Accuracy: 0.5562499761581421
Epoch 40, Loss: 1.0320645570755005, Validation Loss: 1.1148192882537842, Validation Accuracy: 0.5874999761581421
Epoch 50, Loss: 0.9898903965950012, Validation Loss: 1.0875699520111084, Validation Accuracy: 0.612500011920929
Total training time: 45.72 seconds
Accuracy on test set: 0.61


**50 Epochs**

In [None]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
num_epochs = 50
batch_size = 64

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 0.196546271443367, Validation Loss: 0.6916520595550537, Validation Accuracy: 0.7749999761581421
Epoch 20, Loss: 0.03152681142091751, Validation Loss: 0.9633967280387878, Validation Accuracy: 0.831250011920929
Epoch 30, Loss: 0.0008992324583232403, Validation Loss: 0.8444622755050659, Validation Accuracy: 0.831250011920929
Epoch 40, Loss: 0.0003772751078940928, Validation Loss: 0.9074570536613464, Validation Accuracy: 0.831250011920929
Epoch 50, Loss: 0.0002465778961777687, Validation Loss: 0.9424358606338501, Validation Accuracy: 0.8374999761581421
Total training time: 37.57 seconds
Accuracy on test set: 0.84


**Increased Batch Size**

In [None]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 50
batch_size = 128

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 1.5757489204406738, Validation Loss: 1.5814327001571655, Validation Accuracy: 0.35624998807907104
Epoch 20, Loss: 1.4450792074203491, Validation Loss: 1.4711582660675049, Validation Accuracy: 0.518750011920929
Epoch 30, Loss: 1.2595099210739136, Validation Loss: 1.330127477645874, Validation Accuracy: 0.5249999761581421
Epoch 40, Loss: 1.178718090057373, Validation Loss: 1.2532353401184082, Validation Accuracy: 0.53125
Epoch 50, Loss: 1.1224050521850586, Validation Loss: 1.1895838975906372, Validation Accuracy: 0.543749988079071
Total training time: 25.86 seconds
Accuracy on test set: 0.54


**Decreased Batch Size**

In [18]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 2
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 50
batch_size = 32

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 1.1537374258041382, Validation Loss: 1.2943111658096313, Validation Accuracy: 0.5062500238418579
Epoch 20, Loss: 1.012276530265808, Validation Loss: 1.1573225259780884, Validation Accuracy: 0.5625
Epoch 30, Loss: 0.9602954983711243, Validation Loss: 1.095198392868042, Validation Accuracy: 0.625
Epoch 40, Loss: 0.8949155211448669, Validation Loss: 1.0709595680236816, Validation Accuracy: 0.612500011920929
Epoch 50, Loss: 0.8278847336769104, Validation Loss: 1.0547534227371216, Validation Accuracy: 0.6000000238418579
Total training time: 76.96 seconds
Accuracy on test set: 0.60


**Increased Layers**

In [None]:
import torch
import torch.nn as nn

# Define LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Hyperparameters
input_size = X_train.shape[2]
hidden_size = 512
num_layers = 3
num_classes = len(np.unique(y_train))

# Initialize the model
model = LSTM(input_size, hidden_size, num_layers, num_classes)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 100
batch_size = 64

starttime = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i + batch_size]
        targets = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:  # Validation every 10 epochs
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)  # Assuming X_val is also properly reshaped
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            print(f'Epoch {epoch + 1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
total_time = end_time - starttime
print(f'Total training time: {total_time:.2f} seconds')

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_val).sum().item() / y_val.size(0)
    print(f'Accuracy on test set: {accuracy:.2f}')

Epoch 10, Loss: 1.355981469154358, Validation Loss: 1.3912880420684814, Validation Accuracy: 0.40625
Epoch 20, Loss: 1.136789083480835, Validation Loss: 1.2384700775146484, Validation Accuracy: 0.48750001192092896
Epoch 30, Loss: 1.0425759553909302, Validation Loss: 1.1667726039886475, Validation Accuracy: 0.5249999761581421
Epoch 40, Loss: 1.0128295421600342, Validation Loss: 1.1214616298675537, Validation Accuracy: 0.550000011920929
Epoch 50, Loss: 1.0030380487442017, Validation Loss: 1.100312352180481, Validation Accuracy: 0.6000000238418579
Epoch 60, Loss: 0.9815994501113892, Validation Loss: 1.0892752408981323, Validation Accuracy: 0.625
Epoch 70, Loss: 0.9480371475219727, Validation Loss: 1.0790468454360962, Validation Accuracy: 0.612500011920929
Epoch 80, Loss: 0.9028149247169495, Validation Loss: 1.0678064823150635, Validation Accuracy: 0.6312500238418579
Epoch 90, Loss: 0.8540654182434082, Validation Loss: 1.0581471920013428, Validation Accuracy: 0.6187499761581421
Epoch 100, 