In [1]:
import csv
from collections import namedtuple
import os
import glob
import numpy as np
import torch
import time
import cv2
#import json
import numpy as np
from PIL import Image
from torchvision.transforms import *
import torch.utils.data
import torch

In [2]:
# declare initial parameters 

IMG_EXTENSIONS = ['.jpg', '.JPG', '.jpeg', '.JPEG']

clip_size = 18
nclips = 1
step_size = 2
is_val=True

# declare transform (crop, mean, std) #

transform = Compose([
    CenterCrop(84),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225])
])


###################################################

# root = './test_video/9223/%05d.jpg'%i     % path of frames

# define function to load image from a path of image file 
def image_loader(image_path):
    """load image, returns cuda tensor"""
    image = Image.open(image_path).convert('RGB')
    return image

# define function to take file name of frame in a forder

def get_frame_names(path):
    frame_names = []
    for ext in IMG_EXTENSIONS:
        frame_names.extend(glob.glob(os.path.join(path, "*" + ext)))
    frame_names = list(sorted(frame_names))
    num_frames = len(frame_names)
    # set number of necessary frames
    if nclips > -1:
        num_frames_necessary = clip_size * nclips * step_size
    else:
        num_frames_necessary = num_frames

    # pick frames
    offset = 0
    if num_frames_necessary > num_frames:
        # Pad last frame if video is shorter than necessary
        frame_names += [frame_names[-1]] * \
            (num_frames_necessary - num_frames)
    elif num_frames_necessary < num_frames:
        # If there are more frames, then sample starting offset.
        diff = (num_frames - num_frames_necessary)
        # temporal augmentation
        if not is_val:
            offset = np.random.randint(0, diff)
    frame_names = frame_names[offset:num_frames_necessary +
                              offset:step_size]
    return frame_names

In [3]:

ListDataJpeg = namedtuple('ListDataJpeg', ['id', 'label', 'path'])
# define function to read data from csv file
class JpegDataset(object):

    def __init__(self, csv_path_input, csv_path_labels, data_root):
        self.csv_data = self.read_csv_input(csv_path_input, data_root)
        self.classes = self.read_csv_labels(csv_path_labels)
        self.classes_dict = self.get_two_way_dict(self.classes)

    def read_csv_input(self, csv_path, data_root):
        csv_data = []
        with open(csv_path) as csvfile:
            csv_reader = csv.reader(csvfile, delimiter=';')
            for row in csv_reader:
                item = ListDataJpeg(row[0],
                                    row[1],
                                    os.path.join(data_root, row[0])
                                    )
                csv_data.append(item)
        return csv_data

    def read_csv_labels(self, csv_path):
        classes = []
        with open(csv_path) as csvfile:
            csv_reader = csv.reader(csvfile)
            for row in csv_reader:
                classes.append(row[0])
        return classes

    def get_two_way_dict(self, classes):
        classes_dict = {}
        for i, item in enumerate(classes):
            classes_dict[item] = i
            classes_dict[i] = item
        return classes_dict


In [10]:
dataset_object = JpegDataset(data_root="/home/hoanganh/20bn-datasets/20bn-jester-v1/",
                     csv_path_input= "csv_files1/jester-v7-test.csv",
                     csv_path_labels= "csv_files/jester-v1-labels.csv",)

csv_data= dataset_object.csv_data
print(len(csv_data))
path_names = []
target_idx = []
for index in range(len(csv_data)):
    item = csv_data[index]
    path_names.append(item.path)
    target_index = dataset_object.classes_dict[item.label]
    target_idx.append(target_index)

99


In [11]:
print(path_names)
print(target_idx)

['/home/hoanganh/20bn-datasets/20bn-jester-v1/100006', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100021', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100048', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100059', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100075', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100090', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100126', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100132', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100135', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100153', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100183', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100208', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100214', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100226', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100232', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100264', '/home/hoanganh/20bn-datasets/20bn-jester-v1/10033', '/home/hoanganh/20bn-datasets/20bn-jester-v1/100339', '/home/hoanganh/20bn-dataset

In [25]:
### get image paths from an input forder
train_dataset = []
start = time.time()
for path in path_names:

    img_paths = get_frame_names(path)
    ### get frames form img_paths 

    imgs = []

    for img_path in img_paths:
        img = image_loader(img_path)
        img = transform(img)
        imgs.append(torch.unsqueeze(img, 0))

    # print(imgs.shape)    
    ## format data to torch

    data = torch.cat(imgs)
    data = data.permute(1, 0, 2, 3)
    data = data.numpy()
    train_dataset.append(data)
    
    # data = data.unsqueeze(0)

train_dataset = np.array(train_dataset)
print(train_dataset.shape)
print(time.time() - start)

(99, 3, 18, 84, 84)
0.8612079620361328
