This file is used to test a model, when the test set is not a part of the train set.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
#import seaborn as sns
import os
import random
import cv2
import json
from sklearn.model_selection import train_test_split
from torch import nn
import constants as c
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [None]:
# The model

NUM_OF_SPEAKERS = 2
DROP_OUT = 0.5

class Convolutional_Speaker_Identification(nn.Module):

    def cal_paddind_shape(self, new_shape, old_shape, kernel_size, stride_size):
        return (stride_size * (new_shape - 1) + kernel_size - old_shape) / 2

    def __init__(self):
        super().__init__()
        self.conv_2d_1 = nn.Conv2d(1, 96, kernel_size=(7, 7), stride=(2, 2), padding=2)
        self.bn_1 = nn.BatchNorm2d(96)
        self.max_pool_2d_1 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2))

        self.conv_2d_2 = nn.Conv2d(96, 256, kernel_size=(5, 5), stride=(2, 2), padding=2)
        self.bn_2 = nn.BatchNorm2d(256)
        self.max_pool_2d_2 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2))

        self.conv_2d_3 = nn.Conv2d(256, 384, kernel_size=(3, 3), padding=2)
        self.bn_3 = nn.BatchNorm2d(384)

        self.conv_2d_4 = nn.Conv2d(384, 256, kernel_size=(3, 3), padding=2)
        self.bn_4 = nn.BatchNorm2d(256)

        self.conv_2d_5 = nn.Conv2d(256, 256, kernel_size=(3, 3), padding=2)
        self.bn_5 = nn.BatchNorm2d(256)
        self.max_pool_2d_3 = nn.MaxPool2d(kernel_size=(5, 3), stride=(3, 2))

        self.conv_2d_6 = nn.Conv2d(256, 4096, kernel_size=(9, 1), padding=0)
        self.drop_1 = nn.Dropout(p=DROP_OUT)

        self.global_avg_pooling_2d = nn.AdaptiveAvgPool2d((1, 1))
        self.dense_1 = nn.Linear(4096, 1024)
        self.drop_2 = nn.Dropout(p=DROP_OUT)

        self.dense_2 = nn.Linear(1024, NUM_OF_SPEAKERS)

    def forward(self, X):
        x = nn.ReLU()(self.conv_2d_1(X))
        x = self.bn_1(x)
        x = self.max_pool_2d_1(x)

        x = nn.ReLU()(self.conv_2d_2(x))
        x = self.bn_2(x)
        x = self.max_pool_2d_2(x)

        x = nn.ReLU()(self.conv_2d_3(x))
        x = self.bn_3(x)

        x = nn.ReLU()(self.conv_2d_4(x))
        x = self.bn_4(x)

        x = nn.ReLU()(self.conv_2d_5(x))
        x = self.bn_5(x)
        x = self.max_pool_2d_3(x)

        x = nn.ReLU()(self.conv_2d_6(x))
        x = self.drop_1(x)
        x = self.global_avg_pooling_2d(x)

        x = x.view(-1, x.shape[1])  # output channel for flatten before entering the dense layer
        x = nn.ReLU()(self.dense_1(x))
        x = self.drop_2(x)

        x = self.dense_2(x)
        y = nn.LogSoftmax(dim=1)(x)   # consider using Log-Softmax

        return y

    # def get_epochs(self):
    #     return epochs
    # def get_learning_rate(self):
    #     return lr
    # def get_batch_size(self):
    #     return batch_size
    # def to_string(self):
    #     return "Convolutional_Speaker_Identification_Log_Softmax_Model-epoch_"

In [None]:
# Load data into dataframe

train_dir_array =[ r'C:\Users\User\Gender_classification_from_voice\data\json_just_music_data']


def load_data():
    """
    Loads data and preprocess. Returns train and test data along with labels.
    """
    images = []
    labels = []
    size = 29,449
    num=0
    count=0

    print("LOADING DATA FROM : ",end = "")

    for train_dir in train_dir_array:
        num=0 #0 is the label for woman
        print("load from: ",train_dir)
        # this code for singal language load
        for folder in os.listdir(train_dir): ## to check 
            if folder[0] == '.':
                continue
            print(folder, end = ' | ')
            for json_file in os.listdir(train_dir + "/" + folder)[::20]:
                temp_json_file = json.load(open(train_dir + '/' + folder + '/' + json_file))
                temp_np_array = np.array(temp_json_file)
                temp = [cv2.resize(temp_np_array[0], size)]
                temp_img = np.array(temp)
                # plt.imshow(cv2.cvtColor(temp_img, cv2.COLOR_BGR2RGB))
                # plt.show()
                images.append(temp_img)
                labels.append(num)
                count+=1 #count the number of photos
                if count% 5000==0:
                    print("data number: ",count ,"was load")
            num+=1
    
    
    images = np.array(images)
    # images = images.astype('float32')/255
    
    
    X_train, X_test, Y_train, Y_test = train_test_split(images, labels, test_size = 1, random_state=42)
    # X_test, X_validation, Y_test, Y_validation = train_test_split(X_test, Y_test, test_size = 0.8,random_state=42)
    
    
    print()
    # print('Loaded', len(X_train),'images for training,','Train data shape =',X_train.shape)
    # print('Loaded', len(X_validation),'images for validation','validation data shape =',X_validation.shape)
    print('Loaded', len(X_test),'images for testing','Test data shape =',X_test.shape)
    print('\n')

    
    return X_test, Y_test

X_test, Y_test,= load_data()

In [None]:
# Load the model

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("using cuda")
else:
    print("not using cuda")
    device = torch.device("cpu")

model = Convolutional_Speaker_Identification()
model_path ="model.all.pt"  #
model.load_state_dict(torch.load(model_path))
model = model.to(device)
batch_size=32

test_x = torch.tensor(X_test).to(device)
test_y = torch.tensor(Y_test).to(device)

# Define your test data loader
test_dataset = torch.utils.data.TensorDataset(test_x, test_y)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)



In [None]:

#Test our best model

#start test
model.eval()
with torch.no_grad():
    true_labels = []
    predicted_labels = []

    for images, labels in test_loader:
        images = images.to(torch.float)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        true_labels.extend(labels.cpu().tolist())
        predicted_labels.extend(predicted.cpu().tolist())

accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='macro')
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

# Create a dictionary to store the results
results = {
    "model name": model_path,
    "accuracy": f"{accuracy:.4f}",
    "precision": f"{precision:.4f}",
    "recall": f"{recall:.4f}",
    "f1": f"{f1:.4f}"
}

# Save the dictionary to a text file
with open("save.txt", "a") as file:
    file.write(str(results) + "\n")
