In [None]:
from models.LSTM.BiLSTM_LSE import LSE_Bidirectional_LSTM
from datalaoader.csv_sequence_dataset import CSVSequenceDataset
from datalaoader.windowed_csv_dataset import WindowedCSVDataset

import pandas as pd

from collections import defaultdict


import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [13]:
DatasetClass = CSVSequenceDataset

def collate_fn(batch):
    sequences, labels, lengths = zip(*batch)
    sequences_padded = pad_sequence(sequences, batch_first=True, padding_value=0)
    labels = torch.tensor(labels)
    lengths = torch.tensor(lengths)
    lengths, sort_idx = lengths.sort(descending=True)
    sequences_padded = sequences_padded[sort_idx]
    labels = labels[sort_idx]
    return sequences_padded, labels, lengths

def load_file_list(csv_path):
    return pd.read_csv(csv_path).iloc[:, 0].dropna().tolist()

def load_dataset(dataset_class, root_dir, drop_columns, file_list, **kwargs):
    return dataset_class(
        root_dir=root_dir,
        drop_columns=drop_columns,
        file_list=file_list,
        **kwargs
    )

In [14]:
def extract_class_embeddings_from_dataloader(encoder, dataloader, device):
    encoder.eval()
    class_embeddings = defaultdict(list)

    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            x_batch, labels, lengths = batch
            mask = lengths > 0
            if mask.sum() == 0:
                continue

            x_batch = x_batch[mask]
            lengths = lengths[mask]
            labels = labels[mask]

            x_batch = x_batch.to(device)
            lengths = lengths.to(device)
            labels = labels.to(device)

            embeddings = encoder(x_batch, lengths)

            for emb, label in zip(embeddings, labels):
                label_value = label.item()
                class_embeddings[label_value].append(emb.cpu())

    averaged_embeddings = {
        label: torch.mean(torch.stack(emb_list), dim=0).numpy()
        for label, emb_list in class_embeddings.items()
    }

    return averaged_embeddings

In [15]:
train_files = load_file_list('/home/gerardo/LSE_HEALTH/LSE_TFG/train_test_val_split/train_weigthed_samples.csv')
val_files   = load_file_list('/home/gerardo/LSE_HEALTH/LSE_TFG/train_test_val_split/val_weigthed_samples.csv')
test_files  = load_file_list('/home/gerardo/LSE_HEALTH/LSE_TFG/train_test_val_split/test_weigthed_samples.csv')

common_args = {
    'drop_columns': ['frame'],
    'file_list': train_files
}

extra_args = {
    'target_frames': 11,
    'stride': 5
} if DatasetClass == WindowedCSVDataset else {}

train_dataset = load_dataset(DatasetClass, '/home/gerardo/FEATURES_POSE_DATASET', **common_args, **extra_args)

common_args['file_list'] = val_files
val_dataset = load_dataset(DatasetClass, '/home/gerardo/FEATURES_POSE_DATASET', **common_args, **extra_args)

common_args['file_list'] = test_files
test_dataset = load_dataset(DatasetClass, '/home/gerardo/FEATURES_POSE_DATASET', **common_args, **extra_args)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader   = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
test_loader  = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

In [16]:
sample_sequence, _, _ = train_dataset[0]
input_size = sample_sequence.shape[1]
num_classes = len(train_dataset.class_to_idx)

hidden_size = sample_sequence.shape[1] // 3
num_layers = 2
dropout = 0.5
bidirectional = True

In [17]:
model = LSE_Bidirectional_LSTM(input_size, hidden_size, num_layers, num_classes, dropout=dropout, bidirectional=bidirectional).to(device)

In [18]:

model.load_state_dict(torch.load(
    '/home/gerardo/LSE_HEALTH/LSE_TFG/models/LSTM/BiLSTM-PADDED-NORMALICED-WEIGTHED-FOCAL-LOSS-GAMMA-3-79.pth',
    map_location=device
))

model.eval()

LSE_Bidirectional_LSTM(
  (lstm): LSTM(37, 12, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=24, out_features=43, bias=True)
)

## Windows

In [None]:
import os
import torch
import pandas as pd
import numpy as np
import re
from torch.utils.data import Dataset, DataLoader
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class EvaluationCSVDataset(Dataset):
    def __init__(self, csv_path, drop_columns=None, window_size=22, stride=11):
        self.window_size = window_size
        self.stride = stride

        df = pd.read_csv(csv_path)

        if 'frame' in df.columns:
            frame_values = df['frame'].values
        else:
            frame_values = None

        if drop_columns:
            df = df.drop(columns=[col for col in drop_columns if col in df.columns])

        self.sequences = []
        self.frame_ranges = []
        n_frames = len(df)

        for start in range(0, n_frames - window_size + 1, stride):
            end = start + window_size
            window_df = df.iloc[start:end]
            tensor = torch.tensor(window_df.values, dtype=torch.float32)
            self.sequences.append(tensor)
            
            if frame_values is not None:
                self.frame_ranges.append((int(frame_values[start]), int(frame_values[end - 1])))
            else:
                self.frame_ranges.append((None, None))
                
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        length = sequence.size(0)
        frame_range = self.frame_ranges[idx]
        return sequence, length, frame_range

def custom_collate(batch):
    data = torch.utils.data.dataloader.default_collate([item[0] for item in batch])
    lengths = torch.utils.data.dataloader.default_collate([item[1] for item in batch])
    frame_ranges = [item[2] for item in batch]
    return data, lengths, frame_ranges

def frames_to_ms(frame, fps):
    return frame / fps


def label_to_word(label):

    df = pd.read_csv('/home/gerardo/LSE_HEALTH/LSE_TFG/labeled_words.csv')
    
    label_to_words = df.groupby('Encoded_Label')['Word'].apply(list).to_dict()

    return label_to_words[label]

data_dir = '/home/gerardo/LSE_SPOT_FEATURES_DATASET'

preds = {}

for file in os.listdir(data_dir):
    pred_list = []
    if not file.endswith('4_features.csv'):
        csv_path = os.path.join(data_dir, file)
        dataset = EvaluationCSVDataset(
            csv_path=csv_path,
            drop_columns=['frame'], 
            window_size=51,
            stride= 5
        )

        dataloader = DataLoader(dataset, batch_size=4, shuffle=False, collate_fn=custom_collate)
        model.eval()

        with torch.no_grad():
            for i, (x_batch, lengths, frame_ranges) in enumerate(dataloader):
                x_batch = x_batch.to(device)
                lengths = lengths.to(device)


                outputs = model(x_batch, lengths)
                _, predicted = torch.max(outputs, 1)

                for j in range(len(predicted)):
                    frame_range = frame_ranges[j]
                    pred_label = predicted[j].item()
                    pred_list.append((pred_label, frame_range))
                
            filename = re.split(r'_[0-9]*_features.csv', file)[0]
            preds[filename] = pred_list


In [20]:
def extract_true_gestures(xlsx_path, video_filename, accepted_words, label_encoder):

    truth_list = []

    xlsx_df = pd.read_excel(xlsx_path, sheet_name='GlossesContent')
    xlsx_df = xlsx_df[xlsx_df['File'] == video_filename]
    xlsx_df = xlsx_df[xlsx_df['Gloss'].isin(accepted_words)]
    
    for _, row in xlsx_df.iterrows():

        list = [label_encoder[row['Gloss']], row['Start(ms)'] / 1000, row['End(ms)'] / 1000]

        truth_list.append(list)

    return truth_list


truth = {}
spot_dir = '/home/gerardo/LSE_SPOT_DATASET'
xlsx_path = '/home/gerardo/LSE_HEALTH/LSE-Health-UVigo.xlsx'
label_encoder = pd.read_csv('/home/gerardo/LSE_HEALTH/LSE_TFG/labeled_words.csv')
label_encoder = label_encoder.set_index('Word')['Encoded_Label'].to_dict()
words = pd.read_csv('/home/gerardo/LSE_HEALTH/LSE_TFG/labeled_words.csv')['Word'].tolist()
for file in os.listdir(spot_dir):
    if not file.endswith('_4.csv'):
        df = pd.read_csv(os.path.join(spot_dir, file))
        video_filename = re.split(r'_[0-9].csv', file)[0]
        truth_list = extract_true_gestures(xlsx_path, video_filename, words, label_encoder)
        truth[video_filename] = truth_list

In [21]:
tolerance = 15
fps = 25

total_precision = 0
total_recall = 0
total_f1 = 0
total_accuracy = 0
count = 0

for key in preds:
    if key not in truth:
        continue

    pred_gestures = preds[key]
    true_gestures = truth[key]

    matched_preds = set()
    matched_truths = set()

    for i, (true_label, start_s, end_s) in enumerate(true_gestures):
        start_frame = int(start_s * fps)
        end_frame = int(end_s * fps)
        window_start = start_frame - tolerance
        window_end = end_frame + tolerance

        for j, (pred_label, (pred_start, pred_end)) in enumerate(pred_gestures):
            if j in matched_preds:
                continue

            if pred_label == true_label:
                if max(pred_start, window_start) <= min(pred_end, window_end):
                    matched_preds.add(j)
                    matched_truths.add(i)
                    break

    true_positive = len(matched_preds)
    false_positive = len(pred_gestures) - len(matched_preds)
    false_negative = len(true_gestures) - len(matched_truths)

    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    accuracy = true_positive / (true_positive + false_positive + false_negative) if (true_positive + false_positive + false_negative) > 0 else 0

    total_precision += precision
    total_recall += recall
    total_f1 += f1_score
    total_accuracy += accuracy
    count += 1

if count > 0:
    avg_precision = total_precision / count
    avg_recall = total_recall / count
    avg_f1 = total_f1 / count
    avg_accuracy = total_accuracy / count

    print("========== AVERAGE VIDEO-LEVEL METRICS ==========")
    print(f"Average Precision: {avg_precision:.2f}")
    print(f"Average Recall:    {avg_recall:.2f}")
    print(f"Average F1 Score:  {avg_f1:.2f}")
    print(f"Average Accuracy:  {avg_accuracy:.2f}")
else:
    print("No matching videos to evaluate.")


Average Precision: 0.01
Average Recall:    0.15
Average F1 Score:  0.02
Average Accuracy:  0.01
