In [1]:
from datasetSplitter import datasetSplitterStratKFold, datasetFoldMaker
import pandas as pd

datasett = pd.read_csv("data\processed_data.csv")

train_x, test_x, train_y, test_y = datasetSplitterStratKFold(dataFrame=datasett)

kFolds = datasetFoldMaker(train_x, train_y)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
# import torch.nn.init as init
from torch.utils.data import TensorDataset, DataLoader

class BasicModel(nn.Module):
    def __init__(self,inFeatures,hiddens,classes,activations:list):
        super().__init__()
        if isinstance(activations,list):
            assert len(activations) <= len(hiddens)
        linearList = []
        start = inFeatures
        for size in hiddens:
            linearList.append(nn.Linear(start,size))
            start = size

        layerList = []
        for idx in range(len(linearList)):
            layerList.append(linearList[idx])
            if isinstance(activations,list):
                if idx < len(activations):
                    layerList.append(activations[idx])
                else:
                    layerList.append(activations[-1])
            else:
                layerList.append(activations)
        layerList.append(nn.Linear(size,classes))
        # layerList.append(nn.Sigmoid())
        self.block = nn.Sequential(
            *layerList,
        )
    
    def forward(self, x):
        return self.block(x)
        
model = BasicModel(28,[512,256,32],1,
                   [nn.Tanh(),nn.LeakyReLU()])
print(model)

for layer in model.modules():
    if isinstance(layer, (nn.Linear, nn.Conv2d)):  # Check if the layer has weights
#         init.xavier_normal_(layer.weight,gain=0.1)  # Initialize weights using Xavier Normal
#         if layer.bias is not None:
#             init.normal_(layer.bias, mean=0, std=0.05)  
        print(layer.weight,layer.bias)

device = "cpu" #torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

model = model.to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([2.0]))
optimiser = optim.Adagrad(model.parameters(),lr=0.005)

BasicModel(
  (block): Sequential(
    (0): Linear(in_features=28, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=256, out_features=32, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=32, out_features=1, bias=True)
  )
)
Parameter containing:
tensor([[ 0.0462,  0.0186,  0.0191,  ...,  0.0204, -0.1267,  0.0306],
        [-0.0874, -0.0839,  0.0043,  ...,  0.0715, -0.1137, -0.0141],
        [-0.0037,  0.0596, -0.1428,  ...,  0.0464, -0.0860,  0.1586],
        ...,
        [-0.1269,  0.0466, -0.0769,  ..., -0.0946,  0.1886,  0.1820],
        [-0.0163,  0.0278, -0.1288,  ..., -0.0549, -0.1546, -0.1839],
        [ 0.1009, -0.0454, -0.0144,  ...,  0.1664, -0.0035,  0.0378]],
       requires_grad=True) Parameter containing:
tensor([-0.0666,  0.0077,  0.1085, -0.0874, -0.0532,  0.0539,  0.0925, -0.0980,
         0.

In [None]:

from sklearn.metrics import recall_score, accuracy_score,f1_score

epochs = 100
batchSize = 64

# firstBatch = True

for fold, (train_x, test_x, train_y, test_y) in enumerate(kFolds,start=1):

    #? Need to convert the pd.DataFrames to torch.tensors
    # train_x = torch.tensor(train_x.values,dtype=torch.float32)
    # train_x_normalized = (train_x - train_x.min()) / (train_x.max() - train_x.min())
    # train_y = torch.tensor(train_y.values,dtype=torch.float32)
    # test_x = torch.tensor(test_x.values,dtype=torch.float32)
    # test_x_normalized = (test_x - test_x.min()) / (test_x.max() - test_x.min())
    # test_y = torch.tensor(test_y.values,dtype=torch.float32)

    train_dataset = TensorDataset(torch.tensor(train_x.values,dtype=torch.float32), torch.tensor(train_y.values,dtype=torch.float32))
    val_dataset = TensorDataset(torch.tensor(test_x.values,dtype=torch.float32), torch.tensor(test_y.values,dtype=torch.float32))

    train_loader = DataLoader(train_dataset, batch_size=batchSize, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batchSize, shuffle=False)

    model.train()
    print(f"Fold: {fold}")
    for epoch in range(1,epochs+1):
        print(f" Epoch: {epoch}".ljust(12), end="\r")
        running_loss = 0.
        correct_train = 0
        total_train = 0

        
        for batch, (inputs, labels) in enumerate(train_loader,start=1):

            inputs, labels = inputs.to(device), labels.float().to(device)
            optimiser.zero_grad()
            outputs = model(inputs)
           
            loss = criterion(outputs, labels)
            loss.backward()
            optimiser.step()

            running_loss += loss.item()
            predictions = (torch.sigmoid(outputs) > 0.5).float().cpu()
            labels = labels.cpu()
            total_train += labels.size(0)
            correct_train += (predictions == labels).sum().item()
            accuracyScore = accuracy_score(labels,predictions)
            recallScore = recall_score(labels,predictions,zero_division=1)
            f1Score = f1_score(labels,predictions)

            if epoch == 1 and batch == 1:
                # print("Inputs:",inputs)
                print("Labels",labels)
                print("Outputs",outputs)
                print("Preds", predictions)

            print(f" Epoch: {epoch}".ljust(12,"_"),
                  f"Batch: {batch}".ljust(12),
                  f"Loss: {(running_loss/total_train):.4f}".ljust(16),
                  f"AccuracyScore: {(accuracyScore):.2f}".ljust(20),
                  f"RecallScore: {(recallScore):.2f}".ljust(20),
                  f"f1Score: {(f1Score):.2f}".ljust(20),  
                  end="\r")
            # break
        print()

        train_accuracy = (correct_train / total_train)*100
        avg_loss = running_loss / (len(train_loader))

        print(f" Epoch: {epoch}".ljust(12),
              f"Loss: {avg_loss:.4f}".ljust(16),
              f"Train Accuracy: {train_accuracy:.2f}%".ljust(80),
            #   end="\r",
              )
        # if epoch > 10:
        #     break
    # break
    print()

    model.eval()
    print(f"Validation".center(20,"-"))
    
    running_loss = 0.
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for batch, (inputs, labels) in enumerate(val_loader,start=1):

            inputs, labels = inputs.to(device), labels.squeeze().float().to(device)
            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)


            running_loss += loss.item()
            predictions = (outputs > 0.5).float().cpu()
            labels2 = labels.cpu()
            total_val += labels2.size(0)
            correct_val += (predictions == labels2).sum().item()
            accuracy = (predictions == labels2).float().mean()
            accuracyScore = accuracy_score(labels2,predictions)
            recallScore = recall_score(labels2,predictions,zero_division=0.0)

            # print(f"  Batch: {batch+1}",
            #         f"Loss: {(running_loss/total_train):.4f}",
            #         f"AccuracyScore: {(accuracy):.2f}",
            #         f"RecallScore: {(recallScore):.2f}", 
            #         end="\r")
            # break
        # print()
        print(correct_val,total_val)

        val_accuracy = (correct_val / total_val)*100
        avg_loss = running_loss / (len(train_loader))

        print(f"  Loss: {avg_loss:.4f}".ljust(12),
                f"Val Accuracy: {val_accuracy:.2f}%".ljust(50),
                )
    


Fold: 1
Labels tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.]])
Outputs tensor([[-0.1264],
        [-0.1141],
        [-0.1453],
        [-0.1242],
    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


 Epoch: 2___ Batch: 72    Loss: 0.0073     AccuracyScore: 0.96  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 2    Loss: 0.4668     Train Accuracy: 89.77%                                                          
 Epoch: 3___ Batch: 72    Loss: 0.0071     AccuracyScore: 0.82  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 3    Loss: 0.4545     Train Accuracy: 88.74%                                                          
 Epoch: 4___ Batch: 72    Loss: 0.0070     AccuracyScore: 0.90  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 4    Loss: 0.4481     Train Accuracy: 89.20%                                                          
 Epoch: 5___ Batch: 72    Loss: 0.0070     AccuracyScore: 1.00  RecallScore: 1.00    f1Score: 0.00       
 Epoch: 5    Loss: 0.4466     Train Accuracy: 88.77%                                                          
 Epoch: 6___ Batch: 33    Loss: 0.0069     AccuracyScore: 0.92  RecallScore: 0.17    f1Score: 0.29       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


 Epoch: 6___ Batch: 72    Loss: 0.0070     AccuracyScore: 0.90  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 6    Loss: 0.4468     Train Accuracy: 89.33%                                                          
 Epoch: 7___ Batch: 72    Loss: 0.0070     AccuracyScore: 0.92  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 7    Loss: 0.4434     Train Accuracy: 89.20%                                                          
 Epoch: 8___ Batch: 72    Loss: 0.0070     AccuracyScore: 0.90  RecallScore: 0.50    f1Score: 0.55       
 Epoch: 8    Loss: 0.4436     Train Accuracy: 89.16%                                                          
 Epoch: 9___ Batch: 72    Loss: 0.0069     AccuracyScore: 0.90  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 9    Loss: 0.4419     Train Accuracy: 89.22%                                                          
 Epoch: 10__ Batch: 72    Loss: 0.0069     AccuracyScore: 0.88  RecallScore: 0.29    f1Score: 0.40       
 Epoch: 10   Loss: 0.4414 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


 Epoch: 42__ Batch: 72    Loss: 0.0064     AccuracyScore: 0.84  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 42   Loss: 0.4072     Train Accuracy: 89.81%                                                          
 Epoch: 43__ Batch: 72    Loss: 0.0064     AccuracyScore: 0.92  RecallScore: 0.40    f1Score: 0.50       
 Epoch: 43   Loss: 0.4074     Train Accuracy: 89.64%                                                          
 Epoch: 44__ Batch: 72    Loss: 0.0064     AccuracyScore: 0.84  RecallScore: 0.00    f1Score: 0.00       
 Epoch: 44   Loss: 0.4060     Train Accuracy: 89.88%                                                          
 Epoch: 45__ Batch: 72    Loss: 0.0063     AccuracyScore: 0.90  RecallScore: 0.33    f1Score: 0.44       
 Epoch: 45   Loss: 0.4050     Train Accuracy: 90.03%                                                          
 Epoch: 46__ Batch: 72    Loss: 0.0063     AccuracyScore: 0.84  RecallScore: 0.45    f1Score: 0.56       
 Epoch: 46   Loss: 0.4035 

In [83]:

print("Outputs",outputs)
print("Preds", predicted)
print("Labels",labels)

Outputs tensor([[-1.0172],
        [ 0.4554],
        [ 1.1342],
        [-0.0122],
        [ 1.1775],
        [-2.6526],
        [ 1.4175],
        [ 0.1274],
        [-0.0921],
        [-0.9861],
        [ 0.4210],
        [ 0.9787],
        [-0.4351],
        [ 0.9333],
        [-2.1001],
        [-2.6380],
        [-2.0313],
        [-0.9810],
        [-2.0490],
        [ 0.4956],
        [-0.6565],
        [-0.5936],
        [ 0.6807],
        [-0.5846],
        [-0.7008],
        [-0.6991],
        [-1.6942],
        [-2.2613],
        [-0.8856],
        [ 0.1671],
        [-1.4126],
        [-1.1002],
        [ 0.0649],
        [ 0.1838],
        [-0.2642],
        [-0.6062],
        [ 0.8984],
        [-0.8644],
        [ 1.8708],
        [-1.0555],
        [ 0.3642],
        [-1.0209],
        [-1.0453],
        [-0.7794],
        [-1.7509],
        [-0.1454],
        [ 1.9349],
        [-0.0929],
        [ 0.9393],
        [ 1.8216],
        [-0.0266],
        [ 0.7406],
    

In [None]:
train_x_normalized = (train_x - train_x.min()) / (train_x.max() - train_x.min()) 
test_x_normalized = (test_x - test_x.min()) / (test_x.max() - test_x.min())    

train_dataset = TensorDataset(torch.tensor(train_x_normalized.values,dtype=torch.float32),
                              torch.tensor(train_y.values,dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)


test_dataset = TensorDataset(torch.tensor(test_x_normalized.values,dtype=torch.float32),
                             torch.tensor(test_y.values,dtype=torch.float32))
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("Train:", len(train_dataset))
print("Test:", len(test_dataset))

epochs = 50

model.train()
for epoch in range(1,epochs+1):
    print(f" Epoch: [{epoch}/{epochs}]".ljust(16),
        #   end="\r",
          )
    running_loss = 0.
    correct_train = 0
    total_train = 0

    for batch, (inputs, labels) in enumerate(train_loader,start=1):

        inputs, labels = inputs.to(device), labels.float().to(device)
        optimiser.zero_grad()
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimiser.step()

        running_loss += loss.item()
        predictions = (torch.sigmoid(outputs) > 0.5).float().cpu()
        labels = labels.cpu()
        total_train += labels.size(0)
        correct_train += (predictions == labels).sum().item()
        accuracy = (predictions == labels).float().mean()
        accuracyScore = accuracy_score(labels,predictions)
        recallScore = recall_score(labels,predictions,zero_division=0.0)
        f1Score = f1_score(labels,predictions)

        if epoch == 1 and batch == 1:
            print("Inputs:",inputs)
            print("Labels",labels)
            print("outputs", outputs)
            print("Preds", predictions)
            print(f"AccuracyScore: {(accuracy):.2f}")


    #     print(predictions.cpu(), labels2)

        print(f"  Batch: {batch}".ljust(12),
              f"Loss: {(running_loss/total_train):.4f}".ljust(16),
              f"AccuracyScore: {(accuracy):.2f}".ljust(20),
              f"RecallScore: {(recallScore):.2f}".ljust(20), 
              f"f1Score: {(f1Score):.2f}".ljust(20),  
                end="\r")
        # break
    print()

    train_accuracy = (correct_train / total_train)*100
    avg_loss = running_loss / (len(train_loader))

    print(f"  Loss: {avg_loss:.4f}".ljust(16),
          f"Train Accuracy: {train_accuracy:.2f}%".ljust(80),
          # end="\r",
          )
    # break

model.eval()
print("Testing".center(30,"-"))

t_loss = 0.
correct = 0
total = 0

with torch.no_grad():
    for tBatch, (inputs, labels) in enumerate(test_loader,start=1):

        inputs, labels = inputs.to(device), labels.float().to(device)
        outputs = model(inputs)

        loss = criterion(outputs, labels)

        t_loss += loss.item()
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        accuracy = accuracy_score(labels,predicted)
    


        print(f" Batch: {tBatch}".ljust(12), f"Loss: {loss.item():.4f}".ljust(12), f"Accuracy: {(accuracy):.2f}%",end="\r")
    print()

test_accuracy = (correct / total)*100
print(f" Test Accuracy: {test_accuracy:.2f}%".ljust(24), f"Test Loss: {t_loss/tBatch:.4f}".ljust(16))
print()

Train: 4593
Test: 1149
 Epoch: [1/50]  
Inputs: tensor([[0.4828, 1.0000, 0.0044,  ..., 0.0000, 1.0000, 0.0000],
        [0.6897, 0.0000, 0.0204,  ..., 0.0000, 0.0000, 0.0000],
        [0.6379, 1.0000, 0.0068,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.5345, 1.0000, 0.0068,  ..., 0.0000, 0.0000, 0.0000],
        [0.5000, 1.0000, 0.0023,  ..., 0.0000, 0.0000, 0.0000],
        [0.5345, 1.0000, 0.0364,  ..., 0.0000, 0.0000, 0.0000]])
Labels tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
 