In [None]:
import os
import torch
from torchsummary import summary
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import librosa.display
import matplotlib.pyplot as plt
import torch.nn.functional as F
import numpy as np
import random
import librosa
from torch.nn import HuberLoss
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score
import torch.nn.init as init

In [None]:
current_dir = os.getcwd()

# Teacher Architecture

In [None]:
#Teacher Architecture: Code from Cretois et al. (2022)
class VGG11(nn.Module):
    def __init__(self, T=5.0):
        super().__init__()
        self.T = T

        # First set of conv layers -> depth of 64
        self.conv11 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.bn11  = nn.BatchNorm2d(64)
        
        # Second set of conv layers -> from depth 64 to depth 128
        self.conv21 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn21  = nn.BatchNorm2d(128)
        
        # Third set of conv layers -> from depth 128 to depth 256
        self.conv31 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn31  = nn.BatchNorm2d(256)
        self.conv32 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.bn32  = nn.BatchNorm2d(256)
                      
        # Fourth set of conv layers -> from depth 128 to depth 256
        self.conv41 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn41  = nn.BatchNorm2d(512)
        self.conv42 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.bn42  = nn.BatchNorm2d(512)
        
        # Fifth set of conv layers -> from depth 128 to depth 256
        self.conv51 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.bn51  = nn.BatchNorm2d(512)
        self.conv52 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.bn52  = nn.BatchNorm2d(512)
              
        # First FC layer
        self.fc1 = nn.Linear(4 * 4 * 512,  4096)
        # Second FC layer
        self.fc2 = nn.Linear( 4096,  4096)
        
        # Add a dropout layer
        self.dropout = nn.Dropout(p=0.5)
        
        # Output
        self.fc3 = nn.Linear(4096, 1)
      

    def forward(self, x):

        # MaxPool for the first block --> img from 128x128 to 64x64
        out = F.max_pool2d(torch.relu(self.bn11(self.conv11(x))), 2)

        # MaxPool for the first block --> img from 64x64 to 32x32
        out = F.max_pool2d(torch.relu(self.bn21(self.conv21(out))), 2)

        # MaxPool for the first block --> img from 32x32 to 16x16
        out = torch.relu(self.bn31(self.conv31(out)))
        out = F.max_pool2d(torch.relu(self.bn32(self.conv32(out))), 2)
        
        # MaxPool for the first block --> img from 16x16 to 8x8
        out = torch.relu(self.bn41(self.conv41(out)))
        out = F.max_pool2d(torch.relu(self.bn42(self.conv42(out))), 2)
        
        # MaxPool for the first block --> img from 8x8 to 4x4
        out = torch.relu(self.bn51(self.conv51(out)))
        out = F.max_pool2d(torch.relu(self.bn52(self.conv52(out))), 2)
        
        # Flatten the whole thing: image of 4 x 4 * 512 
        out = out.view(-1, 4 * 4 * 512)
        out = self.dropout(torch.relu(self.fc1(out)))
        out = self.dropout(torch.relu(self.fc2(out)))
        
        # Return logits instead of sigmoid output
        logits = self.fc3(out)
        return logits / self.T

# Student Architectures

In [None]:
#Efficient Student 1

# SE Layer: Code adapted from:
#d-li14. (2021). Mobilenetv3.pytorch [Source code]. GitHub. https://github.com/d-li14/mobilenetv3.pytorch/tree/master
class SELayer(nn.Module):
    def __init__(self, channel, reduction=4):
        super(SELayer, self).__init__()
        #Squeeze Operation - Generates vector size of M
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        #Excitation Operation
        self.fc = nn.Sequential(
            #reduces the dimensionality of the squeezed vector
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            #restores the original dimensionality of the squeezed vector 
            nn.Linear(channel // reduction, channel, bias=False),
            #Apply sigmoid to produce channel-wise scaling factors
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        #Multiply input tensor by channel- wise factor for recalibration of channel wise features
        return x * y.expand_as(x)
   

 # Bottleneck Layer: partial-Code adapted from:
#d-li14. (2021). Mobilenetv3.pytorch. GitHub. https://github.com/d-li14/mobilenetv3.
#Generative language model (GPT 3.5 OpenAI (2023))
class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, expansion, use_se):
        super(Bottleneck, self).__init__()
        mid_channels = in_channels * expansion
        self.use_se = use_se
        self.layers = nn.Sequential(
            #Pointwise - Expansion
            nn.Conv2d(in_channels, mid_channels, 1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            #Depth-wise Convolution
            nn.Conv2d(mid_channels, mid_channels, kernel_size, stride, padding=kernel_size // 2, groups=mid_channels, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            #Pointwise - Reduction to desired output
            nn.Conv2d(mid_channels, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
        )
        if self.use_se:
            self.se = SELayer(out_channels)
        self.shortcut = nn.Sequential()
        if stride == 1 and in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, bias=False),
                nn.BatchNorm2d(out_channels),
            )

    def forward(self, x):
        out = self.layers(x)
        # Apply IRB output to SE block
        if self.use_se:
            out = self.se(out)
        out += self.shortcut(x) if self.shortcut else out
        return nn.ReLU(inplace=True)(out)

#STUDENT
class EfficientStudent1(nn.Module):
    def __init__(self, num_classes=1, T=5.0):
        super(EfficientStudent1, self).__init__()
        self.T = T
        self.layers = nn.Sequential(
            nn.Conv2d(1, 16, 3, 1, 1, bias=False), 
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            Bottleneck(16, 16, 3, 1, 1, use_se=True),
            Bottleneck(16, 32, 3, 2, 2, use_se=False),
            Bottleneck(32, 32, 3, 1, 4, use_se=False),
            Bottleneck(32, 64, 3, 2, 6, use_se=True),
            Bottleneck(64, 64, 3, 1, 2, use_se=True),
            Bottleneck(64, 64, 3, 1, 2, use_se=True),
            Bottleneck(64, 128, 3, 2, 4, use_se=True),
            Bottleneck(128, 128, 3, 1, 6, use_se=True),
            Bottleneck(128, 128, 3, 1, 2, use_se=True),
            Bottleneck(128, 256, 3, 2, 4, use_se=True),  
            Bottleneck(256, 256, 3, 1, 6, use_se=True),
            Bottleneck(256, 512, 3, 2, 2, use_se=True),  
            nn.Conv2d(512, 512, 3, 1, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(512, 512, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, num_classes, 1),
            nn.Flatten(start_dim=1),
        )

    def forward(self, x,get_features=False):
        logits = self.layers(x)
        return logits

In [None]:
#EfficientStudent2
class EfficientStudent2(nn.Module):
    def __init__(self, num_classes=1, T=5.0):
        super(EfficientStudent2, self).__init__()
        self.T = T
        self.layers = nn.Sequential(
            nn.Conv2d(1, 16, 3, 1, 1, bias=False), 
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            Bottleneck(16, 16, 3, 1, 1, use_se=True),
            Bottleneck(16, 32, 3, 2, 2, use_se=False),
            Bottleneck(32, 32, 3, 1, 4, use_se=False),
            Bottleneck(32, 64, 3, 2, 6, use_se=True),
            Bottleneck(64, 64, 3, 1, 2, use_se=True),
            Bottleneck(64, 128, 3, 2, 4, use_se=True),
            Bottleneck(128, 128, 3, 1, 6, use_se=True),
            Bottleneck(128, 128, 3, 1, 2, use_se=True),
            Bottleneck(128, 256, 3, 2, 4, use_se=True),  
            Bottleneck(256, 256, 3, 1, 6, use_se=True),
            nn.Conv2d(256, 512, 3, 2, 1, bias=False),  
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(512, 512, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, num_classes, 1),
            nn.Flatten(start_dim=1),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

In [None]:
#EfficientStudent3


class EfficientStudent3(nn.Module):
    def __init__(self, num_classes=1, T=5.0):
        super(EfficientStudent3, self).__init__()
        self.T = T
        self.layers = nn.Sequential(
            nn.Conv2d(1, 4, 3, 1, 1, bias=False), 
            nn.BatchNorm2d(4),
            nn.ReLU(inplace=True),
            Bottleneck(4, 4, 3, 1, 1, use_se=True),
            Bottleneck(4, 8, 3, 2, 2, use_se=False),
            Bottleneck(8, 16, 3, 2, 4, use_se=False),
            Bottleneck(16, 16, 3, 1, 6, use_se=True),
            Bottleneck(16, 32, 3, 2, 2, use_se=True),
            Bottleneck(32, 32, 3, 1, 4, use_se=True),
            Bottleneck(32, 64, 3, 2, 6, use_se=True),
            Bottleneck(64, 64, 3, 1, 2, use_se=True),
            Bottleneck(64, 128, 3, 2, 4, use_se=True),
            Bottleneck(128, 128, 3, 1, 6, use_se=True),
            nn.Conv2d(128, 128, 3, 1, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(128, 128, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, num_classes, 1),
            nn.Flatten(start_dim=1),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

In [None]:
#EfficientStudent4
class EfficientStudent4(nn.Module):
    def __init__(self, num_classes=1, T=5.0):
        super(EfficientStudent4, self).__init__()
        self.T = T
        self.layers = nn.Sequential(
            nn.Conv2d(1, 4, 3, 1, 1, bias=False),
            nn.BatchNorm2d(4),
            nn.ReLU(inplace=True),
            Bottleneck(4, 4, 3, 1, 1, use_se=True),
            Bottleneck(4, 8, 3, 2, 1, use_se=False),
            Bottleneck(8, 16, 3, 2, 1, use_se=True),
            Bottleneck(16, 32, 3, 2, 1, use_se=True),
            Bottleneck(32, 32, 3, 1, 1, use_se=True),
            Bottleneck(32, 64, 3, 2, 1, use_se=True),
            nn.Conv2d(64, 64, 3, 1, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(64, 64, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, num_classes, 1),
            nn.Flatten(start_dim=1),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

# Feature Extraction, Dataloader, & DataSet functions

In [None]:
#Mel-Spec Generation
def generate_mel_spectrogram(x, sr, show=False, resize=True):
    sgram = librosa.stft(x, n_fft=1024, hop_length=376)
    sgram_mag, _ = librosa.magphase(sgram)
    mel_scale_sgram = librosa.feature.melspectrogram(S=sgram_mag, sr=sr, n_mels=128)
    mel_sgram = librosa.amplitude_to_db(mel_scale_sgram)
    if resize:
        # Crop the mel spectrogram to 128x128
        mel_sgram = mel_sgram[:, :128]
    if show:
        librosa.display.specshow(mel_sgram, sr=sr, x_axis='time', y_axis='mel')
        plt.colorbar(format='%+2.0f dB')
    return mel_sgram

# Dataset
class SpeechDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = self.data[idx]
        y = self.labels[idx]
        
        if self.transform:
            x = self.transform(x)

        x = torch.tensor(x)
        return x, torch.tensor(y).unsqueeze(-1)
        
    
  
 # Load data
def load_data(data_path):
    speech_dir = os.path.join(data_path, 'speech')
    no_speech_dir = os.path.join(data_path, 'no_speech')

    speech_files = [os.path.join(speech_dir, f) for f in os.listdir(speech_dir) if f.endswith('.wav')]
    no_speech_files = [os.path.join(no_speech_dir, f) for f in os.listdir(no_speech_dir) if f.endswith('.wav')]

    data = []

    for file in speech_files + no_speech_files:
        x, sr = librosa.load(file)
        mel_sgram = generate_mel_spectrogram(x, sr)
        data.append(mel_sgram)
    
    labels = [1] * len(speech_files) + [0] * len(no_speech_files)

    return data, labels

# Relational distillation loss

In [None]:
#calculates Angle loss between teacher-student logits
def rkd_angle_loss(student_logits, teacher_logits):
    student_angles = torch.matmul(student_logits, student_logits.t())
    teacher_angles = torch.matmul(teacher_logits, teacher_logits.t())

    student_angles = torch.clamp(student_angles, min=-1.0, max=1.0)
    teacher_angles = torch.clamp(teacher_angles, min=-1.0, max=1.0)
    #Instantiate Huber loss
    angle_loss = HuberLoss(delta=1.0)
    #Calculate Angle loss as Huber loss
    return angle_loss(torch.acos(student_angles), torch.acos(teacher_angles))

#Calculates total distillation loss between teacher-student logits
def rkd_distillation_loss(student_logits, teacher_logits, alpha, beta):
    distance_loss = HuberLoss(delta=1.0)
    distance_loss_value = distance_loss(torch.pdist(student_logits), torch.pdist(teacher_logits.detach()))
    angle_loss_value = rkd_angle_loss(student_logits, teacher_logits)
    # Return the total loss as weighted sum 
    return alpha * distance_loss_value + beta * angle_loss_value

# Training Block

In [None]:
#Train Block

def train_student(teacher, student, train_loader, val_loader, num_epochs, T, alpha, beta,device, patience=3):
    teacher.to(device)
    student.to(device)
    
    # Freeze teacher 
    for param in teacher.parameters():
        param.requires_grad = False
    
    #Student-loss and & optimizer
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(student.parameters(), lr=.001)

    # Early stopping setup
    best_val_loss = np.inf
    no_improvement_count = 0


    for epoch in range(num_epochs):
        student.train()
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device).long()
            
            optimizer.zero_grad()
            
      
            # Forward pass through student 
            student_logits = student(inputs)
            
            loss_student = criterion(student_logits, labels.float())

            # Forward pass through teacher 
            teacher_logits = teacher(inputs)
            teacher_prob = torch.sigmoid(teacher_logits / T)
          
            # Calculate distillation loss
            loss_distill = rkd_distillation_loss(student_logits, teacher_logits, alpha, beta)
    
            # Combine total loss
            loss =  loss_student + alpha * loss_distill
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / (i + 1)}')
        
        # Evaluate on validation set
        student.eval()
        val_loss = 0
        auc_scores = []
        f1_scores = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device).long()

                student_logits = student(inputs)
                student_prob = torch.sigmoid(student_logits / T)

                #Calculate AUC
                auc = roc_auc_score(labels.cpu().numpy(), student_prob.cpu().numpy())
                auc_scores.append(auc)
            
                #Calculate F1
                predicted = (student_prob > 0.5).squeeze().long().cpu().numpy()
                f1 = f1_score(labels.cpu().numpy(), predicted)
                f1_scores.append(f1)
            
                val_loss += criterion(student_logits, labels.float()).item()

        val_loss /= len(val_loader)
        mean_auc = np.mean(auc_scores)
        mean_f1 = np.mean(f1_scores)
        print(f'Validation Loss: {val_loss}, AUC: {mean_auc}, F1 Score: {mean_f1}')
        
        
        

        # Check for early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            no_improvement_count = 0
        else:
            no_improvement_count += 1
            if no_improvement_count >= patience:
                print(f'Early stopping after {epoch + 1} epochs')
                break

# Test Block

In [None]:
def test_student(student, test_loader, device):
    student.to(device)
    student.eval()
    
    test_loss = 0
    auc_scores = []
    f1_scores = []
    criterion = nn.BCEWithLogitsLoss()

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device).long()

            student_logits = student(inputs)
            student_prob = torch.sigmoid(student_logits)

            # Calculate AUC
            auc = roc_auc_score(labels.cpu().numpy(), student_prob.cpu().numpy())
            auc_scores.append(auc)

            # Calculate F1
            predicted = (student_prob > 0.5).squeeze().long().cpu().numpy()
            f1 = f1_score(labels.cpu().numpy(), predicted)
            f1_scores.append(f1)

            test_loss += criterion(student_logits, labels.float()).item()

    test_loss /= len(test_loader)
    mean_auc = np.mean(auc_scores)
    mean_f1 = np.mean(f1_scores)
    print(f'Test Loss: {test_loss}, AUC: {mean_auc}, F1 Score: {mean_f1}')

# Training Instantiations - Models, Datasets, Dataloaders

In [None]:
#Training Instantiation:
Teacher_Weights = '/content/drive/MyDrive/Thesis_Material/ecoVAD_model_weight.pt'
Train_Data ='/content/drive/MyDrive/Thesis_Material/Synthetic_Dataset'

#Train_Data ='/content/drive/MyDrive/Thesis_Material/Validation_Dataset'
Evaluation_Data = '/content/drive/MyDrive/Thesis_Material/playback_data'
# Load teacher model and weights

teacher = VGG11()
teacher.load_state_dict(torch.load(Teacher_Weights))  

# student model(s)
student1 = EfficientStudent1()
student2 = EfficientStudent2()
student3 = EfficientStudent3()
student4 = EfficientStudent4()

# Set hyperparameters
num_epochs = 50
T = 5.0
alpha = 0.2
beta = .3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Load data
data, labels = load_data(Train_Data)  

# Split data into train, val, test sets
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.25, random_state=42)

# SpeechDataset instances for training, validation, testing
train_dataset = SpeechDataset(train_data, train_labels, transform=ToTensor())
val_dataset = SpeechDataset(val_data, val_labels, transform=ToTensor())
test_dataset = SpeechDataset(test_data, test_labels, transform=ToTensor())

# DataLoaders for training, validation, and testing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, drop_last=True)

# Train & Test Student Models

In [None]:

#Train Model:

train_student(teacher, student1, train_loader, val_loader, num_epochs, T, alpha,beta, device)

In [None]:
test_student(student1, test_loader, device)

In [None]:

#Train Model:

train_student(teacher, student2, train_loader, val_loader, num_epochs, T, alpha,beta, device)

In [None]:
test_student(student2, test_loader, device)

In [None]:
#Train Model:

train_student(teacher, student3, train_loader, val_loader, num_epochs, T, alpha,beta, device)

In [None]:
test_student(student3, test_loader, device)

In [None]:
#Train Model:

train_student(teacher, student4, train_loader, val_loader, num_epochs, T, alpha,beta, device)

In [None]:
test_student(student4, test_loader, device)

# Final Evaluation: Playback Dataset

In [None]:


def load_data(data_path, filter_name):
    speech_dir = os.path.join(data_path, 'speech')
    no_speech_dir = os.path.join(data_path, 'no_speech')

    speech_files = [os.path.join(speech_dir, f) for f in os.listdir(speech_dir) if f.endswith('.wav') and filter_name in f]
    no_speech_files = [os.path.join(no_speech_dir, f) for f in os.listdir(no_speech_dir) if f.endswith('.wav') and filter_name in f]

    data = []

    for file in speech_files + no_speech_files:
        x, sr = librosa.load(file)
        mel_sgram = generate_mel_spectrogram(x, sr)
        data.append(mel_sgram)
    
    labels = [1] * len(speech_files) + [0] * len(no_speech_files)

    return data, labels


# Define file name filters
file_name_filters = [
    "Forest_1m",
    "Forest_5m",
    "Forest_10m",
    "Forest_20m",
    "OL_1m",
    "OL_5m",
    "OL_10m",
    "OL_20m",
]

# Create datasets and data loaders for each filter
datasets = {}
data_loaders = {}

for filter_name in file_name_filters:
    data, labels = load_data(Evaluation_Data, filter_name)
    
    # Split data 
    train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.01, random_state=42)

    # SpeechDataset instance
    train_dataset = SpeechDataset(train_data, train_labels, transform=ToTensor())

    # DataLoader
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)

    datasets[filter_name] = {
        "train": train_dataset}
    
    data_loaders[filter_name] = {
        "train": train_loader}


In [None]:
def test_student(student, test_loader, device):
    student.to(device)
    student.eval()
    
    test_loss = 0
    auc_scores = []
    f1_scores = []
    criterion = nn.BCEWithLogitsLoss()

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device).long()

            student_logits = student(inputs)
            student_prob = torch.sigmoid(student_logits)

            # Calculate AUC
            auc = roc_auc_score(labels.cpu().numpy(), student_prob.cpu().numpy())
            auc_scores.append(auc)

            # Calculate F1
            predicted = (student_prob > 0.5).squeeze().long().cpu().numpy()
            f1 = f1_score(labels.cpu().numpy(), predicted)
            f1_scores.append(f1)

            test_loss += criterion(student_logits, labels.float()).item()

    test_loss /= len(test_loader)
    mean_auc = np.mean(auc_scores)
    mean_f1 = np.mean(f1_scores)
    print(f'Test Loss: {test_loss}, AUC: {mean_auc}, F1 Score: {mean_f1}')

In [None]:
#Final-Eval data- Student 1
for filter_name in file_name_filters:
    print(f"Evaluating {filter_name} test set")
    test_loader = data_loaders[filter_name]["train"]
    test_student(student1, test_loader, device)

In [None]:
#Final-Eval data- Student 2
for filter_name in file_name_filters:
    print(f"Evaluating {filter_name} test set")
    test_loader = data_loaders[filter_name]["train"]
    test_student(student2, test_loader, device)

In [None]:
#Final-Eval data- Student 3
for filter_name in file_name_filters:
    print(f"Evaluating {filter_name} test set")
    test_loader = data_loaders[filter_name]["train"]
    test_student(student3, test_loader, device)

In [None]:
#Final-Eval data- Student 4
for filter_name in file_name_filters:
    print(f"Evaluating {filter_name} test set")
    test_loader = data_loaders[filter_name]["train"]
    test_student(student4, test_loader, device)