<a href="https://colab.research.google.com/github/ValentinaEmili/Sign_language/blob/main/ASL_recognition_100.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The glossary is made of 100 different words but the instances for each word are not the same as the ones in WLASL_v0.3 file. Indeed, some links were broken and the correspective instances have been removed. Every word has at least one instance.

In [2]:
# mount google drive on colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import pandas as pd
import cv2
from google.colab.patches import cv2_imshow
from tqdm import tqdm
import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_sequence, pad_packed_sequence
from torch.nn import LSTM
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Dataset
import shutil

In [6]:
js_file = pd.read_json("/content/drive/MyDrive/NLP/WLASL100.json")
folder = "/content/drive/MyDrive/NLP/dataset/subset_100/"
original_folder = "/content/drive/MyDrive/NLP/dataset/"

training_folder = folder + "train/"
validation_folder = folder + "val/"
test_folder = folder + "test/"

training_video = training_folder + "video/"
validation_video = validation_folder + "video/"
test_video = test_folder + "video/"

training_images = training_folder + "images/"
validation_images = validation_folder + "images/"
test_images = test_folder + "images/"

os.makedirs(training_images, exist_ok=True)
os.makedirs(validation_images, exist_ok=True)
os.makedirs(test_images, exist_ok=True)

In [None]:
js_100 = pd.read_json("/content/drive/MyDrive/NLP/WLASL100.json")

for i, word in enumerate(tqdm(list(js_100['gloss']), desc='glosses')):
  for j, instance in enumerate(js_100['instances'][i]):
    video_id = js_100['instances'][i][j]['video_id']
    source = js_100['instances'][i][j]['source']
    split = js_100['instances'][i][j]['split']
    filename = f"{word}_{video_id}.npy"

    if split == 'train':
      source = os.path.join(original_folder, 'train/images/', filename)
      dest = os.path.join(training_images, filename)
      shutil.copy(source, dest)

    if split == 'val':
      source = os.path.join(original_folder, 'val/images/', filename)
      dest = os.path.join(validation_images, filename)
      shutil.copy(source, dest)

    if split == 'test':
      source = os.path.join(original_folder, 'test/images/', filename)
      dest = os.path.join(test_images, filename)
      shutil.copy(source, dest)

Preprocess the data

In [None]:
train_gloss, val_gloss, test_gloss = set(), set(), set()
for image in os.listdir(training_images):
  word, _ = image.split("_")
  train_gloss.add(word)

for image in os.listdir(validation_images):
  word, _ = image.split("_")
  val_gloss.add(word)

for image in os.listdir(test_images):
  word, _ = image.split("_")
  test_gloss.add(word)
gloss = sorted(list(train_gloss | val_gloss | test_gloss))


label_map = {label: num for num, label in enumerate(gloss)}

In [None]:
class SignLanguageDataset(Dataset):
  def __init__(self, image_dir, label_map):
     self.image_dir = image_dir
     self.label_map = label_map
     self.files = sorted(os.listdir(image_dir))

  def __len__(self):
    return len(self.files)

  def __getitem__(self, idx):
    file_name = self.files[idx]
    np_array = np.load(os.path.join(self.image_dir, file_name))
    if np_array.size == 0 or len(np_array.shape) != 2 or np_array.shape[1] != 258:
      print(f"Warning: Empty or invalid shape for file: {file_name}")
      np_array = np.zeros((1, 258), dtype=np.float32)

    label, _ = file_name.split("_")
    label = self.label_map[label]

    return torch.tensor(np_array, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

# Add zero-padding to get sequences of the same length for each batch
def collate_fn(batch):
  sequences, labels = zip(*batch)
  lengths = [len(seq) for seq in sequences]
  padded_sequences = pad_sequence(sequences, batch_first=True)

  # pack the padded sequence
  packed_sequences = pack_padded_sequence(padded_sequences, lengths, batch_first=True, enforce_sorted=False)
  return packed_sequences, torch.tensor(labels)

train_dataset = SignLanguageDataset(training_images, label_map)
val_dataset = SignLanguageDataset(validation_images, label_map)
test_dataset = SignLanguageDataset(test_images, label_map)

batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)