In [1]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision

In [2]:
import os
from PIL import Image
from tqdm import tqdm
import numpy as np

In [50]:
trainData_Dir = "D:/project/dataset/sing/asl_alphabet_train/asl_alphabet_train"

# Define the transformations
train_transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.RandomRotation(degrees=15),  # Resize images to a fixed size
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize the images
])

dataset = datasets.ImageFolder(trainData_Dir, transform=train_transform)

train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
t , v = random_split(dataset, [train_size, val_size])

batch_size = 32
shuffle_data = True


val_loader = DataLoader(v, batch_size=batch_size, shuffle=False)

train_loader = DataLoader(t, batch_size=batch_size, shuffle=shuffle_data)



In [353]:
mapper

{0: 'A',
 1: 'B',
 2: 'C',
 3: 'D',
 4: 'del',
 5: 'E',
 6: 'F',
 7: 'G',
 8: 'H',
 9: 'I',
 10: 'J',
 11: 'K',
 12: 'L',
 13: 'M',
 14: 'N',
 15: 'nothing',
 16: 'O',
 17: 'P',
 18: 'Q',
 19: 'R',
 20: 'S',
 21: 'space',
 22: 'T',
 23: 'U',
 24: 'V',
 25: 'W',
 26: 'X',
 27: 'Y',
 28: 'Z'}

In [9]:
mapper = {}
mapper_ = {}
for i, dir_name in enumerate(os.listdir(trainData_Dir)):
    mapper[i] = dir_name
    mapper_[dir_name] = i

In [51]:
test_transform = transforms.Compose([
    transforms.Resize((100, 100)),  # Resize images to a fixed size
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize the images
])

In [74]:
testData_Dir = "D:\project\dataset\ASL_Dataset\Test\W"
images = []
lables = []
for file_name in os.listdir(testData_Dir):
    # Get the full path of the file
    image_path =os.path.join(testData_Dir, file_name)
    image = Image.open(image_path)
    transformed_image = test_transform(image)
    images.append(transformed_image)
    #lable =mapper_[file_name.split("_")[0]]
    lables.append(25)


In [75]:
images_t = torch.stack(images)
lables_t = torch.tensor(lables, dtype=torch.long)

test_dataset = TensorDataset(images_t, lables_t)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [13]:
model = torchvision.models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\Yuriy/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
 11%|█▏        | 11.2M/97.8M [00:01<00:09, 9.21MB/s]


KeyboardInterrupt: 

In [57]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(20000, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes),
        )
    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

model = SimpleCNN(29)

In [58]:
import torchvision.models as models
import torch.nn.functional as F
resNet = models.resnet34(pretrained=True)
class MyResNet(nn.Module):
    def __init__(self, model, froze):
        super(MyResNet, self).__init__()
        if(froze):
          for param in model.parameters():
              param.requires_grad = False
        self.model = torch.nn.Sequential(*list(model.children())[:-1])
        self.relu = nn.ReLU()
        self.l = nn.Linear(512, 29)
        self.dropout = nn.Dropout2d(0.2)

    def forward(self, x):
        batch, _, _, _ = x.shape
        x = self.model(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1)
        x = self.dropout(x)
        res = self.l(x)
        return res

myResNet = MyResNet(resNet, False)



In [59]:
checkpoint_path = "D:/project/asl/checkpoint.pt"

def save_model(model):
    torch.save(model.state_dict(), checkpoint_path)

In [60]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [61]:

model.to(device)
opt_myCNN = optim.Adam(model.parameters(), lr = 0.0001)
criterion = nn.CrossEntropyLoss()
num_epochs=2

In [62]:
model.to("cuda")

SimpleCNN(
  (features): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=20000, out_features=256, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=256, out_features=29, bias=True)
  )
)

In [48]:
def validate(model, dataloader):
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    val_running_errors = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(dataloader.dataset)/dataloader.batch_size)):
            #print(data[0].shape)
            data, labels = data[0].to(device), data[1].to(device)

            outputs = model(data)
            #print(outputs)
            #print(labels)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()* data.size(0)
            _, preds = torch.max(outputs.data, 1)
            val_running_correct += torch.sum(preds == labels)
            val_running_errors += torch.sum(preds != labels)

        val_loss = val_running_loss/len(dataloader.dataset)
        val_accuracy = val_running_correct/len(dataloader.dataset)
        val_errore = val_running_errors/len(dataloader.dataset)
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy}')
        
        return val_loss, val_accuracy, val_errore
# train part
def train(model,optimizer, dataloader):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    train_running_errors = 0
    for i, data in tqdm(enumerate(dataloader), total=int(len(dataloader.dataset) /dataloader.batch_size)):
        #print(data[1])
        data, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(data)

        loss = criterion(outputs, labels)

        _, preds = torch.max(outputs.data, 1)
        train_running_errors += torch.sum(preds != labels)
        train_running_correct += torch.sum(preds == labels)
        train_running_loss += loss.item()* data.size(0)
        loss.backward()
        optimizer.step()
        
    train_loss = train_running_loss/len(dataloader.dataset)
    train_accuracy = train_running_correct/len(dataloader.dataset)
    train_errore = train_running_errors/len(dataloader.dataset)

    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy}")
    return train_loss, train_accuracy, train_errore
 #  main 
def model_train(model, optimizer, train_dataloader ,test_dataloader, epochs):
  err_history = {"train" : [], "val" : []}
  acc_history =  {"train" : [], "val" : []}
  loss_history = {"train" : [], "val" : []}
  for epoch in range(epochs):
      print(f"Epoch {epoch+1} of {epochs}")
      phase = "train"
      print(phase)
      loss, accuracy, error = train(model, optimizer, train_dataloader)
      err_history[phase].append(error)
      acc_history[phase].append(accuracy)
      loss_history[phase].append(loss)
      phase = "val"
      print(phase)
      loss, accuracy, error = validate(model, test_dataloader)
      err_history[phase].append(error)
      acc_history[phase].append(accuracy)
      loss_history[phase].append(loss)
  save_model(model)
  return err_history, acc_history, loss_history

In [63]:
myCNN_err_history, myCNN_acc_history , myCNN_loss_history = model_train(model, opt_myCNN, train_loader, val_loader ,num_epochs )

Epoch 1 of 2
train


2447it [10:39,  3.83it/s]                          


Train Loss: 2.2818, Train Acc: 0.30832695960998535
val


272it [01:00,  4.47it/s]                         


Val Loss: 1.3559, Val Acc: 0.6277011632919312
Epoch 2 of 2
train


2447it [03:53, 10.50it/s]                          


Train Loss: 1.3443, Train Acc: 0.5569859743118286
val


272it [00:18, 14.54it/s]                         

Val Loss: 0.8091, Val Acc: 0.7822988629341125





In [65]:
def validate_(model, optimizer, dataloader):
    model.eval()
    print("1")
    val_running_loss = 0.0
    val_running_correct = 0
    val_running_errors = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(dataloader.dataset)/dataloader.batch_size)):
            #print(data[0].shape)
            data, labels = data[0].to(device), data[1].to(device)

            outputs = model(data)
            #print(labels)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()* data.size(0)
            _, preds = torch.max(outputs.data, 1)
            #print(preds)
            val_running_correct += torch.sum(preds == labels)
            val_running_errors += torch.sum(preds != labels)

        val_loss = val_running_loss/len(dataloader.dataset)
        val_accuracy = val_running_correct/len(dataloader.dataset)
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy}')
        
        return outputs, labels, preds

In [383]:
validate_(model, _ , val_loader)

1


272it [00:51,  5.30it/s]                         


Val Loss: 0.2687, Val Acc: 0.9111494421958923


(tensor([[-1.3707e+01, -7.3577e+00, -2.8976e+01, -2.1046e+01, -1.2819e+01,
          -1.6865e+01, -1.5723e+01, -2.9239e+01, -1.4102e+01, -2.7500e+01,
          -5.8300e+00, -2.3414e+01, -2.1396e+01, -2.4804e+01, -1.4026e+01,
          -3.5189e+01, -2.5664e+01, -6.8277e+00, -7.8694e+00, -1.8882e+00,
          -4.1162e+00,  4.0520e+00,  4.9129e+00,  3.8504e+00,  1.7671e+00,
           1.3478e+00, -1.7756e+01, -1.1727e+01, -6.2857e+00],
         [-3.0261e+01, -3.3771e+01, -1.6869e+01, -2.1579e+01, -2.0277e+01,
          -1.8472e+01,  8.7345e+00,  1.0297e+01, -1.0017e+01,  7.2036e+00,
          -1.7011e+01, -3.1334e+01, -2.6127e+01, -1.3679e+01, -2.6391e+01,
           5.7676e+00, -4.4864e+00, -1.5943e+01, -2.2357e+01, -3.1205e+01,
          -1.9504e+01, -1.4486e+01, -2.5741e+01, -1.0493e+01, -3.1590e+00,
          -3.1030e+01, -1.3413e+00, -1.9823e+01,  5.0195e-01],
         [-4.7278e+00, -2.9115e+01, -1.4662e+00, -1.2794e+01, -1.7192e+01,
          -1.6715e+01, -1.9474e+01, -2.7534e+01, 

1


1it [00:00,  3.14it/s]

Val Loss: 20.5002, Val Acc: 0.1785714328289032





In [66]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [326]:
count = [0]*29
for e in val_loader.dataset:
    count[e[1]] +=1

In [None]:
count

In [76]:
outputs, leble, pred  = validate_(model, _ , test_dataloader)

1


1it [00:00, 66.92it/s]

Val Loss: 8.1486, Val Acc: 0.0





In [78]:
torch.max(pred)

tensor(22, device='cuda:0')

In [70]:
def plot_confusion_matrix(cm, 
                      normalized=False, 
                      title=None, 
                      cmap=plt.cm.Blues,
                      size=(16,12)):
    fig, ax = plt.subplots(figsize=size)
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)

In [312]:
cm

array([[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0],
       [0, 0, 0, 0, 

In [231]:
model_test = SimpleCNN(29)

In [142]:
saved_state_dict = torch.load(checkpoint_path)

model_test.load_state_dict(saved_state_dict)
model_test.to(device)

SimpleCNN(
  (features): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=29, bias=True)
  )
)

In [185]:
len(train_data_loader.dataset)

87000

In [342]:
out, lab, preds =  validate_(model, "8" ,test_dataloader)

1


1it [00:00,  6.56it/s]

Val Loss: 3.3891, Val Acc: 0.0357142873108387





In [343]:
preds

tensor([28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
        28, 28, 28, 28, 28, 28, 28, 28, 28, 28], device='cuda:0')

In [344]:
lab

tensor([ 0,  1,  2,  3,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 14, 16, 17, 18,
        19, 21, 20, 22, 23, 24, 25, 26, 27, 28], device='cuda:0')

In [168]:
test_dataset[:,0]

IndexError: too many indices for tensor of dimension 1

In [176]:
out_R  =model_test(images_t.to(device))
loss = criterion(out_R, lables_t.to(device))

In [180]:
out_R

tensor([[  25.1246,  -17.7779,  -30.0222,  -46.3044,  -13.8914,  -13.6885,
          -33.4458,  -59.6534,  -17.4975,  -46.3703,  -45.5736,   -0.7717,
          -10.1311,  -16.1549,   -4.7489,  -39.6303,  -25.6147,  -51.9624,
            8.8614,   27.1093,  -26.5826,  -34.8662,  -16.8425,   10.4190,
            0.4968,   -6.2862,  -45.1497,   -7.3668,  -32.7714],
        [  -3.4972,   31.2964,  -28.4469,   -3.5423,   19.0190,  -17.2941,
          -26.2976,  -15.2658,    5.8858,  -37.4065,  -36.9119,  -33.4379,
          -15.3079,  -18.1010,  -20.4883,  -24.0730,  -25.1254,  -32.8994,
           -3.7259,  -26.4858,  -11.4578,  -49.4884,  -27.8632,  -28.3417,
          -25.3667,  -53.3120,  -26.9595,   -9.6482,  -48.6743],
        [ -37.7241,  -78.8876,   92.2101,   29.9073,    7.8852,  -19.0678,
          -12.7656,  -54.3811,  -25.9852,  -99.3926, -131.8276,  -40.5023,
          -76.2138,  -68.5968,   17.5785,  -41.7020,    5.9910, -155.1064,
           -2.1584,  -23.8685, -105.0168, -13

In [186]:
_, preds = torch.max(out_R.data, 1)

In [187]:
_

tensor([ 27.1093,  31.2964,  92.2101,  57.4406,  38.3035, 124.1692,  50.7408,
         67.6636,  45.2949,  37.1401,  25.1132,  33.7056,  26.8946,  21.5302,
         46.9998,  51.6844,  68.3438,  59.0578,   5.4448,  16.1544,  19.0294,
         35.4986,  25.2949,  20.5789,  47.1341,  14.0050,  31.2637,  24.2863],
       device='cuda:0')

In [188]:
preds

tensor([19,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 27, 13, 14, 15, 16,
        17, 28, 18, 19, 20, 21, 22, 23, 24, 25], device='cuda:0')

In [189]:
lables_t

tensor([ 0,  1,  2,  3,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 14, 16, 17, 18,
        19, 21, 20, 22, 23, 24, 25, 26, 27, 28])

In [177]:
loss

tensor(45.9709, device='cuda:0', grad_fn=<NllLossBackward0>)