In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

In [24]:
import glob
from torchvision import transforms as T
from PIL import Image
import os
from tqdm.notebook import tqdm

# Create model & load weights
- Create pytorch module from code
- Load weights to model

In [2]:
class HandSignClassify(nn.Module):
    def __init__(self, num_classes=29, architecture='tf_efficientnetv2_b2'):
        super(HandSignClassify, self).__init__()
        self.base_model = timm.create_model(architecture, pretrained=True)
        self.base_model.classifier = nn.Sequential(
            nn.Linear(1408, 625),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(625, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        return self.base_model(x)

In [4]:
model = HandSignClassify()
model.load_state_dict(torch.load('weights/effnet_v2_b2.pth'))

model.to('cuda');

# Inference code

In [19]:
labels = sorted([os.path.basename(folder_path) for folder_path in glob.glob('data/asl_alphabet_train/asl_alphabet_train/*')])
index_label = dict([(index, label) for index, label in enumerate(labels)])

In [11]:
test_transform = T.Compose([
    T.Resize(size=(224, 224)),
    T.ToTensor(),
    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

In [7]:
images_path = glob.glob('data/asl_alphabet_test/asl_alphabet_test/*.jpg')

In [33]:
for image_path in tqdm(images_path):
    input_tensor = test_transform(Image.open(image_path))
    logits = model(input_tensor.to('cuda').unsqueeze(0))

    preds = index_label[logits.argmax().detach().cpu().item()]
    
    print(preds, os.path.basename(image_path).split('_')[0])

pred, label


HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))

Z Z
Q J
T T
A B
Q Q
A A
D D
Q C
I I
Y W
space H
G G
space space
T S
M M
K K
Y Y
R R
Q N
L V
L L
F F
M E
Q nothing
U U
Q P
O O
X X

