In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
import numpy as np
import os
import shutil
import  opennmt.inputters.record_inputter as inpu
import tensorflow as tf
import torch.utils.data as data
from PIL import Image
import os
import os.path
import pickle
from scipy.spatial.distance import cosine, euclidean,correlation
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
class I(torch.nn.Module):
    def __init__(self):
        super(I, self).__init__()
        
    def forward(self, x):
        return x
    
    def extra_repr(self):
        return 'identity'


class VideoClass(torch.nn.Module):
    def __init__(self):
        super(VideoClass, self).__init__()
        self.model = torchvision.models.video.r2plus1d_18(pretrained=True)
        self.model.fc = I()
        self.fc_out = torch.nn.Linear(in_features=512, out_features=61)

    def forward(self,x):
        x = self.model(x)
        out = self.fc_out(x)
        return out
    
    
    def get_feature(self,x):
        x = self.model(x)
        return x
    

In [3]:
class ImageFilelist(data.Dataset):
    def __init__(self, image_list,seq_size = 4, transform=None):
        self.imgs = image_list
        self.transform = transform
        self.seq_size = seq_size

    def __getitem__(self, index):
        impaths, target = self.imgs[index]
        vs = torch.zeros(3,4,112,112)
        for i,impath in enumerate(impaths):
            img = self.img_loader(impath)
            img = self.transform(img)
            vs[:,i,:,:] = img
        return vs, target

    def __len__(self):
        return len(self.imgs)
    
    def img_loader(self,path):
        return Image.open(path).convert('RGB')

In [4]:
model_path = '/home/alptekin/Desktop/CNN_Train/StableModels/3D-CNN-Sign/model.pth'
data_type = 'test' 
description = 'right-hand'

In [5]:
def get_images_from_folder(path,masked=False):
    folders = sorted(os.listdir(path))
    img_paths = []
    count= 0
    for folder in folders:
        full_path = path+'/'+folder + '/' + 'right/'
        imgs = sorted(os.listdir(full_path))
        c = 0
        temp = []
        for img in imgs:
            if img.endswith('.png'):
                name = full_path + img
                name = name.split('/')[-4:]
                name = '/'.join(name)
                if masked:
                    if name in confs:
                        temp.append((full_path + img,count))
                        if confs[name] > 0.4:
                            img_paths.append((full_path + img,count))
                            c += 1
                else:
                    img_paths.append((full_path + img,count))
                    c += 1
        if c == 0:
            img_paths.extend(temp)
        count += 1
    return img_paths

In [6]:
data_path = '/media/alptekin/ssd_data/Hand-Translation-Dataset/' + data_type

In [7]:
device=torch.device("cuda:0")
img_size = 112
batch_size = 128

In [8]:
trans = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989])])

In [9]:
image_list = get_images_from_folder(data_path,masked=False)

In [10]:
videos = dict()
for v,_ in image_list:
    vn = v.split('/')[6]
    if vn in videos:
          videos[vn].append(v)
    else:
          videos[vn] = [v]

In [11]:
vs = list()
for key, value in videos.items():
    label = key
    video = list()
    ln = len(value)
    for i in range(0,ln,4):
        s = value[i:i+4]
        if len(s) == 4:
            vs.append((s,label))
        else:
            k = 4- len(s)
            s.extend([s[-1]]*k)
            vs.append((s,label))

In [12]:
data_folder = ImageFilelist(image_list=vs, transform=trans)
data_loader =torch.utils.data.DataLoader(data_folder, batch_size=batch_size,shuffle=False,num_workers=8)

In [13]:
counts = np.array([x[1] for x in image_list ])
counts = np.unique(counts)

In [14]:
num_img = len(data_folder.imgs)

In [15]:
model = torch.load(model_path,map_location=device)
model = model.eval()
model.to(device)
features_array = np.zeros((num_img,512))

In [16]:
(features_array == 0).sum() / features_array.reshape(-1).shape[0]

1.0

In [17]:
for i,(x,y) in enumerate(data_loader):
    with torch.no_grad():
        features_array[i*batch_size:(i+1)*batch_size,:] = model.get_feature(x.to(device)).cpu().numpy()
    if (i+1) % 100 == 0:
        print(i/len(data_loader),end='\r')

0.8461538461538461

In [18]:
label_list = list(map(lambda x: x[1],data_folder.imgs))
_,label_cnts = np.unique(label_list,return_counts=True)

In [19]:
intervals = label_cnts.cumsum()
intervals = np.insert(intervals,0,0)

In [21]:
file = tf.python_io.TFRecordWriter(data_type+'.tfrecord')
dis_list = list()
for ind in range(intervals.shape[0]-1):
    start,end = intervals[ind:ind+2]
    f = features_array[start:end][::-1]
    inpu.write_sequence_record(f,file)
    #print(data_folder.imgs[start][0],data_folder.imgs[end][0])
file.close()