<a href="https://colab.research.google.com/github/fjtm/deep-fake-voice-recognition/blob/feature%2Fstart/04_testing_DNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simple Exploration of Dense Networks with PyTorch

Introduction:

In this notebook, we will practically delve into the implementation and evaluation of dense networks using PyTorch. Dense networks, also known as fully connected networks, are essential in deep learning. This exercise will allow us to delve into building dense layers, designing network architectures, and understanding the training process in PyTorch.

In [None]:
!pip install torchmetrics

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading and balancing data

In [5]:
import torch
import pandas as pd
import numpy as np

In [147]:
csv_files_path = f'/content/drive/My Drive/deep-fake-voice-recognition/data/'
train = pd.read_csv(csv_files_path+"train.csv")
test = pd.read_csv(csv_files_path+"test.csv")
train = train[train.ind_num <=7].copy() #Balanced dataset

In [148]:
target_column = "target"
index_columns = "ind_num"
not_data_columns = ["label", "ind", "target","ind_num"]

# Preparing tensors with train/test data

In [189]:
def MaxMinScalerTorch(data, dim = 0):
    data_min = data.min(dim=dim, keepdim=True).values
    data_max = data.max(dim=dim, keepdim=True).values
    scaled_data = (data - data_min) / (data_max - data_min)
    return scaled_data

In [227]:
X_train = torch.tensor(train.drop(not_data_columns, axis = 1).values).type(torch.float)
y_train = torch.tensor(train[target_column].values).type(torch.float).view(-1, 1)
X_test = torch.tensor(test.drop(not_data_columns, axis = 1).values).type(torch.float)
y_test = torch.tensor(test[target_column].values).type(torch.float).view(-1, 1)

# Scale
X_train = MaxMinScalerTorch(X_train)
X_test = MaxMinScalerTorch(X_test)

X_train.size(), y_train.size(), X_test.size(), y_test.size()

(torch.Size([620474, 58]),
 torch.Size([620474, 1]),
 torch.Size([716918, 58]),
 torch.Size([716918, 1]))

In [191]:
# Make device agnostic code to use GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [228]:
import torch
import torch.nn as nn

class DNNWithDropout(nn.Module):
    """
    A simple Dense Neural Network with Dropout implemented using PyTorch.

    Parameters:
        - dropout_rate (float): The dropout rate to be applied to the network layers.
    """

    def __init__(self, dropout_rate=0.5):
        super().__init__()

        # First dense layer with input size 58 and output size 112
        self.layer_1 = nn.Linear(58, 112)

        # ReLU activation function to introduce non-linearity
        self.relu_1 = nn.ReLU()

        # Dropout layer to prevent overfitting, with specified dropout rate
        self.dropout_1 = nn.Dropout(p=dropout_rate)

        # Second dense layer with input size 112 and output size 1
        self.layer_2 = nn.Linear(112, 1)

        # Dropout layer after the second dense layer
        self.dropout_2 = nn.Dropout(p=dropout_rate)

        # Sigmoid activation function to squash the output between 0 and 1
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Forward pass through the first dense layer
        x = self.layer_1(x)

        # Applying ReLU activation
        x = self.relu_1(x)

        # Applying dropout to the output of the first dense layer
        x = self.dropout_1(x)

        # Forward pass through the second dense layer
        x = self.layer_2(x)

        # Applying dropout to the output of the second dense layer
        x = self.dropout_2(x)

        # Applying sigmoid activation to squash the output
        x = self.sigmoid(x)

        return x

In [217]:
# Define model
model_0 = DNNWithDropout().to(device)

# Create a loss function
loss_fn = nn.BCELoss() # BCEWithLogitsLoss = sigmoid built-in

# Create an optimizer
lr = 0.5
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=lr)

# Accuracy metric
from torchmetrics.classification import BinaryAccuracy
acc_metric = BinaryAccuracy().to(device)

In [212]:
from torch.utils.data import TensorDataset, DataLoader

# Create a DataLoader for test and train
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=10000, shuffle=True, num_workers=0)
test_dataset = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=10000, shuffle=True, num_workers=0)

In [213]:
from timeit import default_timer as timer
def print_train_time(start: float, end: float, device: torch.device = None):
    """Prints difference between start and end time.

    Args:
        start (float): Start time of computation (preferred in timeit format).
        end (float): End time of computation.
        device ([type], optional): Device that compute is running on. Defaults to None.

    Returns:
        float: time between start and end in seconds (higher is longer).
    """
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

In [218]:
# Import tqdm for progress bar
from tqdm.auto import tqdm

# Set the seed and start the timer
torch.manual_seed(42)
train_time_start_on_cpu = timer()

# Set the number of epochs (we'll keep this small for faster training times)
epochs = 10

# Create training and testing loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n-------")
    ### Training
    train_loss = 0
    # Add a loop to loop through training batches
    for batch, (X, y) in enumerate(train_dataloader):
       # Send data to GPU
        X, y = X.to(device), y.to(device)
        model_0.train()
        # 1. Forward pass
        y_pred = model_0(X)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulatively add up the loss per epoch

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Print out how many samples have been seen
        # if batch % 10 == 0:
        #     print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

    # Divide total train loss by length of train dataloader (average loss per batch per epoch)
    train_loss /= len(train_dataloader)

    ### Testing
    # Setup variables for accumulatively adding up loss and accuracy
    test_loss, test_acc = 0, 0
    model_0.eval()
    with torch.inference_mode():
        for X, y in test_dataloader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            # 1. Forward pass
            test_pred = model_0(X)

            # 2. Calculate loss (accumatively)
            test_loss += loss_fn(test_pred, y) # accumulatively add up the loss per epoch

            # 3. Calculate accuracy (preds need to be same as y_true)
            test_acc += acc_metric(target=y, preds=(test_pred >= 0.5).float().view(-1, 1))

        # Calculations on test metrics need to happen inside torch.inference_mode()
        # Divide total test loss by length of test dataloader (per batch)
        test_loss /= len(test_dataloader)

        # Divide total accuracy by length of test dataloader (per batch)
        test_acc /= len(test_dataloader)

    ## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f}, Test acc: {test_acc*100:.3f}%\n")

# Calculate training time
train_time_end_on_cpu = timer()
total_train_time_model_0 = print_train_time(start=train_time_start_on_cpu,
                                           end=train_time_end_on_cpu,
                                           device=str(next(model_0.parameters()).device))

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 0
-------

Train loss: 0.68530 | Test loss: 0.73498, Test acc: 20.182%

Epoch: 1
-------

Train loss: 0.67404 | Test loss: 0.71231, Test acc: 39.186%

Epoch: 2
-------

Train loss: 0.66128 | Test loss: 0.55457, Test acc: 89.075%

Epoch: 3
-------

Train loss: 0.65150 | Test loss: 0.88782, Test acc: 14.635%

Epoch: 4
-------

Train loss: 0.64464 | Test loss: 0.68993, Test acc: 52.951%

Epoch: 5
-------

Train loss: 0.63721 | Test loss: 0.95096, Test acc: 9.989%

Epoch: 6
-------

Train loss: 0.63344 | Test loss: 0.48589, Test acc: 89.650%

Epoch: 7
-------

Train loss: 0.62984 | Test loss: 0.63426, Test acc: 80.224%

Epoch: 8
-------

Train loss: 0.62578 | Test loss: 0.59727, Test acc: 84.821%

Epoch: 9
-------

Train loss: 0.61772 | Test loss: 0.48662, Test acc: 89.797%

Train time on cuda:0: 193.921 seconds
