In [1]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)
print('Location:', torch.__file__) # /home/emre/.local/lib/python3.7/site-packages

1.12.0+cu102
True
10.2
Location: /home/emre/anaconda3/envs/emre_venv/lib/python3.9/site-packages/torch/__init__.py


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from typing import Union, Tuple
from torch_geometric.typing import OptPairTensor, Adj, Size # Optional[Tensor], Union[Tensor, SparseTensor], Optional[Tuple[int, int]], all about data type

from torch import Tensor
from torch.nn import Linear
import torch.nn.functional as F
from torch_sparse import SparseTensor, matmul
from torch_geometric.nn.conv import MessagePassing

In [3]:
import warnings
warnings.filterwarnings("ignore")
from joblib import Parallel, delayed

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as gnn
from torchsummary import summary
from torch_geometric.nn import MessagePassing
from torch_geometric.data import Dataset, Data, DataLoader
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.utils import softmax

In [25]:
import pickle
from tqdm import tqdm
import numpy as np
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '2'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' 

In [5]:
def load_pkl(filename):
    with open(filename, 'rb') as input:
        data = pickle.load(input)
    return data

In [6]:
[train_images, train_vids, train_skels, train_labels,
 test_images, test_vids, test_skels, test_labels] = load_pkl('datasets/WLASL_10_vid_skel_mDsim.pkl')
train_images = np.swapaxes(train_images.astype(np.float64), 1, 3)
test_images = np.swapaxes(test_images.astype(np.float64), 1, 3)
train_labels = np.argmax(train_labels, -1)
test_labels = np.argmax(test_labels, -1)

print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)

(224, 3, 128, 128)
(224,)
(56, 3, 128, 128)
(56,)


## Dataloader

In [7]:
class CustomImageDataset(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        temp_x = torch.tensor(self.x[idx], dtype=torch.float)
        temp_y = torch.tensor(self.y[idx], dtype=torch.long)
#         self.x = self.x[idx]
#         self.y = self.y[idx]
#         return self.x[idx], self.y[idx] 
        return temp_x, temp_y

## Hyperparams

In [8]:
batch_size = 32

## Classify mD

In [13]:
training_data = CustomImageDataset(torch.from_numpy(train_images), torch.from_numpy(train_labels))
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)
testing_data = CustomImageDataset(torch.from_numpy(test_images), torch.from_numpy(test_labels))
test_loader = DataLoader(testing_data, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)

In [14]:
training_features, training_labels = next(iter(train_loader))
print(f"Feature batch shape: {training_features.size()}")
print(f"Labels batch shape: {training_labels.size()}")

Feature batch shape: torch.Size([32, 3, 128, 128])
Labels batch shape: torch.Size([32])


In [11]:
# iterate through the dataset:
for i, batch in enumerate(test_loader):
    print(f'{i}', '. batch length:', f'{len(batch[0])}')

0 . batch length: 32
1 . batch length: 24


In [15]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, 3)
        self.conv3 = nn.Conv2d(128, 128, 3)
        self.conv4 = nn.Conv2d(128, 128, 3)
        self.fc1 = nn.Linear(14*14*128, 256) # 3 layers: 14*14*128, 4 layers: 6*6*128
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.drop = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
#         x = self.pool(F.relu(self.conv4(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = F.relu(self.fc2(x))
        x = self.drop(x)
        x = F.softmax(self.fc3(x))
        return x

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
summary(model, (3, 128, 128))
print('device:', device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 126, 126]           1,792
         MaxPool2d-2           [-1, 64, 63, 63]               0
            Conv2d-3          [-1, 128, 61, 61]          73,856
         MaxPool2d-4          [-1, 128, 30, 30]               0
            Conv2d-5          [-1, 128, 28, 28]         147,584
         MaxPool2d-6          [-1, 128, 14, 14]               0
            Linear-7                  [-1, 256]       6,422,784
           Dropout-8                  [-1, 256]               0
            Linear-9                  [-1, 128]          32,896
          Dropout-10                  [-1, 128]               0
           Linear-11                   [-1, 10]           1,290
Total params: 6,680,202
Trainable params: 6,680,202
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forw

In [8]:
def train(prog_bar = True):
    
    max_test_acc = 0.0
    
    for epoch in range(epochs):
        train_acc = []
        test_acc = []
        if prog_bar:
            pbar = tqdm(train_loader,position=0)
        else:
            pbar = train_loader
            
        # train
        for data in pbar:
            x, labels = data
            x = x.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, labels)
            loss.backward()
            optimizer.step()
            preds = np.argmax(out.cpu().detach().numpy(), -1)
            labels = labels.cpu().detach().numpy()
            acc = np.sum(preds == labels) / len(labels) * 100
            train_acc.append(acc)
            if prog_bar:
                pbar.set_description('Train_acc: '+str(round(acc,2)))
                
        # test
        for data in test_loader:
            x, labels = data
            x = x.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, labels)
            preds = np.argmax(out.cpu().detach().numpy(), -1)
            labels = labels.cpu().detach().numpy()
            acc = np.sum(preds == labels) / len(labels) * 100
            test_acc.append(acc)
            
        if np.mean(test_acc) > max_test_acc:
            max_test_acc = np.mean(test_acc)
            torch.save(model.state_dict(), 'models\model_cnn_mDsim.pth')
        
#         lr_scheduler.step(test_loss)
        print('Epoch: ', str(epoch+1)+'/'+str(epochs),'| Training Acc: ', round(np.mean(train_acc), 2), '| Testing Acc: ', round(np.mean(test_acc), 2))
#         train_losses.append(train_loss)
#         test_losses.append(test_loss)

        
#         if not prog_bar:
#             plt.plot(train_losses, label="Train Loss")
#             plt.plot(test_losses, label="Validation Loss")
#             plt.xlabel("# Epoch")
#             plt.ylabel("Loss")
#             plt.legend(loc='upper right')
#             plt.show()

In [15]:
epochs = 100
train(prog_bar=False)

Epoch:  1/100 | Training Acc:  10.71 | Testing Acc:  15.62
Epoch:  2/100 | Training Acc:  13.39 | Testing Acc:  14.58
Epoch:  3/100 | Training Acc:  14.29 | Testing Acc:  15.62
Epoch:  4/100 | Training Acc:  17.41 | Testing Acc:  21.88
Epoch:  5/100 | Training Acc:  16.52 | Testing Acc:  16.15
Epoch:  6/100 | Training Acc:  26.34 | Testing Acc:  8.85
Epoch:  7/100 | Training Acc:  23.21 | Testing Acc:  29.69
Epoch:  8/100 | Training Acc:  20.09 | Testing Acc:  20.83
Epoch:  9/100 | Training Acc:  23.21 | Testing Acc:  25.52
Epoch:  10/100 | Training Acc:  28.57 | Testing Acc:  28.65
Epoch:  11/100 | Training Acc:  28.12 | Testing Acc:  18.75
Epoch:  12/100 | Training Acc:  22.77 | Testing Acc:  20.31
Epoch:  13/100 | Training Acc:  25.45 | Testing Acc:  20.83
Epoch:  14/100 | Training Acc:  25.45 | Testing Acc:  26.04
Epoch:  15/100 | Training Acc:  25.89 | Testing Acc:  25.0
Epoch:  16/100 | Training Acc:  26.34 | Testing Acc:  20.83
Epoch:  17/100 | Training Acc:  31.25 | Testing Acc

# Classify video

In [9]:

batch_size = 1

In [10]:
max_seq_len = np.max([len(t) for t in train_vids])
print('max_seq_len:', max_seq_len)

for idx, t in enumerate(train_vids):
    num_pad = max_seq_len - len(t)
    if num_pad != 0:
        last_frame = np.expand_dims(t[-1], 0)
        train_vids[idx] = np.concatenate([t, np.tile(last_frame, [num_pad, 1, 1, 1])])
train_vids = np.array(list(train_vids)) 
train_vids = np.swapaxes(train_vids.astype(np.float64), 1, -1)
# train_vids = train_vids.reshape(train_vids.shape[0], train_vids.shape[1],
#                                 train_vids.shape[2], train_vids.shape[3]*train_vids.shape[4])
# [batch_size, channels, height, width, depth].
print(train_vids.shape)

for idx, t in enumerate(test_vids):
    num_pad = max_seq_len - len(t)
    if num_pad != 0:
        last_frame = np.expand_dims(t[-1], 0)
        test_vids[idx] = np.concatenate([t, np.tile(last_frame, [num_pad, 1, 1, 1])])
test_vids = np.array(list(test_vids))
test_vids = np.swapaxes(test_vids.astype(np.float64), 1, -1)
# test_vids = test_vids.reshape(test_vids.shape[0], test_vids.shape[1],
#                               test_vids.shape[2], test_vids.shape[3]*test_vids.shape[4])
print(test_vids.shape)

max_seq_len: 153
(224, 3, 128, 128, 153)
(56, 3, 128, 128, 153)


In [11]:
training_data = CustomImageDataset(torch.from_numpy(train_vids), torch.from_numpy(train_labels))
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)
testing_data = CustomImageDataset(torch.from_numpy(test_vids), torch.from_numpy(test_labels))
test_loader = DataLoader(testing_data, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)

In [12]:
training_features, training_labels = next(iter(train_loader))
print(f"Feature batch shape: {training_features.size()}")
print(f"Labels batch shape: {training_labels.size()}")

Feature batch shape: torch.Size([1, 3, 128, 128, 153])
Labels batch shape: torch.Size([1])


In [35]:
class CNN3D(nn.Module):
    def __init__(self):
        super().__init__()
        # 2D
#         self.conv = nn.Sequential(nn.Conv2d(3, 32, 3),
#                                   nn.MaxPool2d(2, 2),
                                  
#                                   nn.Conv2d(32, 64, 3),
#                                   nn.MaxPool2d(2, 2),
                                  
#                                   nn.Conv2d(64, 128, 3),
#                                   nn.MaxPool2d(2, 2)
#                                   )
        
                                  
#         self.fc = nn.Sequential(nn.Linear(14*128*2446, 16),
#                                 nn.Dropout(0.5),
                                
#                                 nn.Linear(16, 10)
#                                )
        
        self.conv = nn.Sequential(nn.Conv3d(3, 32, 3),
                                  nn.ReLU(),
                                  nn.MaxPool3d((2, 2, 2)),
                                  
                                  nn.Conv3d(32, 64, 3),
                                  nn.ReLU(),
                                  nn.MaxPool3d((2, 2, 2)),
                                  
                                  nn.Conv3d(64, 128, 3),
                                  nn.ReLU(),
                                  nn.MaxPool3d((2, 2, 1)),
                                  
                                  nn.Conv3d(128, 128, 3),
                                  nn.ReLU(),
                                  nn.MaxPool3d((2, 2, 1))
                                  )
        
                                  
        self.fc = nn.Sequential(nn.LSTM(32, 256, num_layers=1),
                                
                                nn.Linear(256, 128),
                                nn.ReLU(),
#                                 nn.Dropout(0.5),
                                
                                nn.Linear(128, 64),
                                nn.ReLU(),
#                                 nn.Dropout(0.5),
                                
                                nn.Linear(64, 10)
                               )
        
#         self.pool = nn.MaxPool3d(2, 2)
#         self.conv2 = nn.Conv3d(32, 64, 3)
#         self.conv3 = nn.Conv3d(64, 64, 3)
        self.lstm = nn.LSTM(32, 128, num_layers=1)
        self.fc1 = nn.Linear(589824, 16) # 3 layers: 14*14*128, 4 layers: 6*6*128
#         self.fc2 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(16, 10)
        self.drop = nn.Dropout(0.5)

    def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = self.pool(F.relu(self.conv3(x)))
#         x = self.pool(F.relu(self.conv4(x)))
        x = self.conv(x)
        x = torch.flatten(x, 1, -2) # flatten all dimensions except batch
#         x = x.view()
#         x = F.relu(self.fc1(x))
#         x = self.fc(x)
#         x = self.drop(x)
        x, hidden = self.lstm(x)
        x = F.relu(self.fc1(torch.flatten(x, 1)))
        x = self.drop(x)
        x = F.relu(self.fc2(x))
#         x = F.relu(self.fc2(x))
#         x = self.drop(x)
        x = F.softmax(x)
        return x

In [41]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(2)
model = CNN3D().to(device)
# summary(model, (3, 128, 128, max_seq_len))
print('device:', device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

device: cuda


In [39]:
epochs = 100
batch_size = 8
train(prog_bar=False)

Epoch:  1/100 | Training Acc:  11.16 | Testing Acc:  5.36
Epoch:  2/100 | Training Acc:  8.93 | Testing Acc:  10.71
Epoch:  3/100 | Training Acc:  8.93 | Testing Acc:  8.93
Epoch:  4/100 | Training Acc:  9.82 | Testing Acc:  12.5
Epoch:  5/100 | Training Acc:  11.61 | Testing Acc:  12.5
Epoch:  6/100 | Training Acc:  4.02 | Testing Acc:  10.71
Epoch:  7/100 | Training Acc:  9.38 | Testing Acc:  7.14


KeyboardInterrupt: 