In [1]:
import pandas as pd
import torch
torch.cuda.manual_seed(42)

# reading files and selecting columns


In [2]:
dataset = pd.read_csv('df_for_modeling.csv')

X_df = dataset.drop(['file_name', 'selected'], axis = 1)
y_df = dataset['selected']
files = dataset['file_name']

from sklearn.model_selection import train_test_split

X_train_df, X_test_df, y_train_df, y_test_df = train_test_split(
    X_df,
    y_df,
    test_size=.3,
    random_state=42
)

# Transforming to torch and using gpu

In [3]:
# Make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

X_train = torch.from_numpy(X_train_df.to_numpy()).to(device).type(torch.float)
X_test = torch.from_numpy(X_test_df.to_numpy()).to(device).type(torch.float)
y_train = torch.from_numpy(y_train_df.to_numpy()).to(device).type(torch.float)
y_test = torch.from_numpy(y_test_df.to_numpy()).to(device).type(torch.float)


# Setting model

In [26]:
from torch import nn

class model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim) -> None:
        super().__init__()

        # layers
        self.layer1 = nn.Linear(in_features=input_dim, out_features=hidden_dim)
        self.activation = nn.ReLU()
        self.layer2 = nn.Linear(in_features=hidden_dim, out_features=100)
        self.layer3 = nn.Linear(in_features=100, out_features=output_dim)
        self.sigmoid = nn.Sigmoid()

        #Forward
    def forward(self, x):
        out = self.layer1(x)
        out = self.activation(out)
        out = self.layer2(out)
        out = self.activation(out)
        out = self.layer3(out)
        out = self.sigmoid(out)

        return out

## Training the model

In [28]:
from torch import optim

learning_rate = 0.01
input_dim = 99
output_dim = 1
hidden_dim = 200
epochs = 10000

model0 = model(input_dim, hidden_dim, output_dim).to(device)

criterion = nn.BCELoss()
optimizer = optim.SGD(model0.parameters(), lr=learning_rate)

# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

for epoch in range(epochs):
    # Forward pass
    outputs = model0(X_train)

    loss = criterion(outputs.squeeze(), y_train)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))

        # Evaluate the model
        with torch.no_grad():
            y_pred = model0(X_test)
            y_pred_cls = (y_pred > 0.5).float().squeeze()
            accuracy = accuracy_fn(y_pred_cls, y_test)
            print('Accuracy: {:.2f}%'.format(accuracy))


Epoch [100/10000], Loss: 0.6079
Accuracy: 71.84%
Epoch [200/10000], Loss: 0.5984
Accuracy: 71.84%
Epoch [300/10000], Loss: 0.5951
Accuracy: 71.84%
Epoch [400/10000], Loss: 0.5920
Accuracy: 71.84%
Epoch [500/10000], Loss: 0.5885
Accuracy: 71.84%
Epoch [600/10000], Loss: 0.5847
Accuracy: 71.84%
Epoch [700/10000], Loss: 0.5803
Accuracy: 71.84%
Epoch [800/10000], Loss: 0.5754
Accuracy: 71.84%
Epoch [900/10000], Loss: 0.5697
Accuracy: 71.87%
Epoch [1000/10000], Loss: 0.5633
Accuracy: 71.94%
Epoch [1100/10000], Loss: 0.5562
Accuracy: 72.08%
Epoch [1200/10000], Loss: 0.5485
Accuracy: 72.53%
Epoch [1300/10000], Loss: 0.5405
Accuracy: 72.88%
Epoch [1400/10000], Loss: 0.5327
Accuracy: 73.68%
Epoch [1500/10000], Loss: 0.5252
Accuracy: 74.14%
Epoch [1600/10000], Loss: 0.5184
Accuracy: 74.76%
Epoch [1700/10000], Loss: 0.5124
Accuracy: 75.11%
Epoch [1800/10000], Loss: 0.5072
Accuracy: 75.95%
Epoch [1900/10000], Loss: 0.5027
Accuracy: 76.37%
Epoch [2000/10000], Loss: 0.4990
Accuracy: 76.58%
Epoch [21