In [None]:
# Author: Emma Ding (2024)
# Model Testing File
# Kaggle Environment
# GPU T4 x2 Accelerator

import os
from PIL import Image
from IPython.display import FileLink

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

!pip install ultralytics
from ultralytics import YOLO

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [None]:
# Import training results data
train_results = pd.read_csv('/kaggle/input/sign-language/kaggle/working/runs/classify/train/results.csv')

In [None]:
# Plot training and validation loss by epochs
plt.figure()
plt.plot(results['                  epoch'], results['             train/loss'], label='train loss')
plt.plot(results['                  epoch'], results['               val/loss'], label='val loss', c='red')
plt.grid()
plt.title('Loss vs Epochs')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend()

In [None]:
# Plot validation accuracy by epochs
plt.figure()
plt.plot(results['                  epoch'], results['  metrics/accuracy_top1'] * 100)
plt.grid()
plt.title('Validation Accuracy vs Epochs')
plt.ylabel('Accuracy (%)')
plt.xlabel('Epochs')

In [None]:
# Create directory for processed ASL MNIST Dataset
os.makedirs("/kaggle/working/sign-language-data")
os.makedirs("/kaggle/working/sign-language-data/test")

# Import ASL MNIST Dataset
# MNIST_test is 20
sl_df_val=pd.read_csv('/kaggle/input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv')

# Split into validation and testing, 10:10
sl_df_test=sl_df_val[len(sl_df_val.index)//2:]

# Transform raw pixel-level data to images for testing
sl_df_test.apply(lambda x: Image.fromarray(np.reshape(np.uint8(x.iloc[1:]), (28,28)),'L').save("/kaggle/working/sign-language-data/test/{name}.png".format(name=x.name)),axis=1)

In [None]:
# Generate predictions
results=sl_df_test.apply(lambda x: model('/kaggle/working/sign-language-data/test/{index}.png'.format(index=x.name)),axis=1)

In [None]:
model = YOLO('/kaggle/input/sign-language/kaggle/working/runs/classify/train/weights/best.pt')

In [None]:
# Arbitrary names dict from results dataframe
names_dict = results[3586][0].names

# Most probable using max activation
probs = list(map(lambda x: np.argmax(x[0].probs.data.tolist()), results))

# Original class from most probable
preds=list(map(lambda x:names_dict[x],probs))

# Save results
sl_df_test.loc[:,'preds']=preds
sl_df_test.to_csv('preds.csv')
FileLink('preds.csv')

In [None]:
# Reformatting data
y_test=sl_df_test['label'].tolist()
y_pred=sl_df_test['label'].tolist()

In [None]:
# Print metrics
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Identical because perfect discriminiation
precision = precision_score(y_test, y_pred, average='macro')
print("Precision:", precision)

precision = precision_score(y_test, y_pred, average='micro')
print("Precision:", precision)

precision = precision_score(y_test, y_pred, average='weighted')
print("Precision:", precision)

recall = recall_score(y_test, y_pred, average='macro')
print("Recall (Sensitivity):", recall)

recall = recall_score(y_test, y_pred, average='micro')
print("Recall (Sensitivity):", recall)

recall = recall_score(y_test, y_pred, average='weighted')
print("Recall (Sensitivity):", recall)

f1 = f1_score(y_test, y_pred,average='macro')
print("F1-Score:", f1)

print(classification_report(y_test,y_pred))