In [122]:
import random
import torch
from   torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from   torchvision import datasets, transforms

import pandas as pd
import os
import cv2
from os import path as osp
import numpy as np
from torch.utils.data import Dataset, DataLoader

import subprocess
import os
import os.path as osp
from tqdm import tqdm

In [123]:
target_path = '/hdd/datasets/Moments_in_Time_Mini/training'

'''
for action_folder in os.listdir(target_path):
    action_example_folder = osp.join(target_path, action_folder)

    for action_example in tqdm(os.listdir(action_example_folder)):
        if not action_example.endswith('.mp4'):
            continue
        action_example_name = action_example.split('.')[0]

        write_dir = osp.join(action_example_folder, action_example_name)
        action_example_path = osp.join(action_example_folder, action_example)

        if not osp.exists(write_dir):
            os.makedirs(write_dir)

        run_cmd = 'ffmpeg -loglevel panic -i %s %s' % (action_example_path, write_dir + '/image%d.png')
        os.system(run_cmd)
'''

'''
#open csv
fileIn = open("/hdd/datasets/Moments_in_Time_Mini/trainingSet.csv", "r")
#create new csv for writing to 
fileOut = open("/hdd/datasets/Moments_in_Time_Mini/trainingSet_filtered.csv", "w")
for row in fileIn:
    #get string from row
    split = row.split(".")
    #delimit directory name from string
    directory = split[0]
    #if it exists in /hdd/datasets/Moments_in_Time_Mini/training, write to filtered csv
    if os.path.isdir("/hdd/datasets/Moments_in_Time_Mini/training/" + directory):
        #write row to trainingSet_filtered.csv
        fileOut.write(row)
    #else, skip over the row
    
fileIn.close()
fileOut.close()
'''

In [124]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

In [125]:
class MomentsDataset(Dataset):
    def __init__(self, data_path):
        self.data_path = data_path
        self.df = pd.read_csv(self.data_path + 'trainingSet_filtered.csv', header=None)
        self.class_to_index = {}
        actions = [file_name.split('/')[0] for file_name in
                list(self.df[self.df.columns[1]])]

        actions = set(actions)

        for i, action in enumerate(actions):
            self.class_to_index[action] = i


    def get_num_classes(self):
        return len(self.class_to_index)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        print('---- df loc-----')
        print(self.df.iloc[idx])
        print('idx', idx)
        file_loc, label, _, _ = self.df.iloc[idx]
        print('file loc', file_loc)

        full_path = osp.join(self.data_path, 'training', file_loc)
        dir_path = full_path.split('.')[0]

        im_data = []
        images = os.listdir(dir_path)
        for image in images:
            im = cv2.imread(osp.join(dir_path, image))
            resized_image = cv2.resize(im, (128, 128))
            #gray_im = rgb2gray(resized_image)

            im_data.append(resized_image)

        im_data = np.array(im_data)
        im_data = im_data / 255.0
        im_data = im_data[:90]
        if len(im_data) < 90:
            paste_im_data = np.zeros((90, 128, 128, 3))
            paste_im_data[:len(im_data)] = im_data
            paste_im_data[len(im_data):] = im_data[-1]
            im_data = paste_im_data

        index = self.class_to_index[label]

        ret_info = {
                'images': im_data[0],
                'label': index
                }

        return ret_info

In [126]:
class AutoEncoder(nn.Module):
    
    def __init__(self, code_size):
        super().__init__()
        self.code_size = code_size
        
        # Encoder specification
        self.enc_cnn_1 = nn.Conv2d(3, 6, kernel_size=5) #1, 10
        self.pool = nn.MaxPool2d(2, 2)
        self.enc_cnn_2 = nn.Conv2d(6, 16, kernel_size=5) #10, 20
        self.enc_linear_1 = nn.Linear(16 * 29 * 29, 120) #4 * 4 * 20, 50
        self.enc_linear_2 = nn.Linear(120, 84)
        self.enc_linear_3 = nn.Linear(50, self.code_size) #50
        
        # Decoder specification
        self.dec_linear_1 = nn.Linear(self.code_size, 160)
        self.dec_linear_2 = nn.Linear(160, IMAGE_SIZE)
        
    def forward(self, images):
        code = self.encode(images)
        out = self.decode(code)
        return out, code
    
    def encode(self, images):
        code = self.enc_cnn_1(images)
        code = F.selu(F.max_pool2d(code, 2))
        
        code = self.enc_cnn_2(code)
        code = F.selu(F.max_pool2d(code, 2))
        
        code = code.view([images.size(0), -1])
        code = F.selu(self.enc_linear_1(code))
        code = self.enc_linear_2(code)
        return code
    
    def decode(self, code):
        out = F.selu(self.dec_linear_1(code))
        out = F.sigmoid(self.dec_linear_2(out))
        out = out.view([code.size(0), 1, IMAGE_WIDTH, IMAGE_HEIGHT])
        return out

In [127]:
IMAGE_SIZE = 784
IMAGE_WIDTH = IMAGE_HEIGHT = 28

In [128]:
# Hyperparameters
#code_size = 20 #num_classes
num_epochs = 10 #4
batch_size = 4 #128
lr = 0.002
optimizer_cls = optim.Adam

In [129]:
dataset_path = '/hdd/datasets/Moments_in_Time_Mini/'

print('data set ')
ds = MomentsDataset(dataset_path)

code_size = ds.get_num_classes()
print(code_size)

#label_count = ds.get_num_classes()
#batch_size = 4
print('data loader')
dataloader = DataLoader(ds, batch_size=batch_size, shuffle=True, num_workers=1)

# Load data
#train_data = datasets.MNIST('~/data/mnist/', train=True , transform=transforms.ToTensor())
#test_data  = datasets.MNIST('~/data/mnist/', train=False, transform=transforms.ToTensor())
#train_loader = torch.utils.data.DataLoader(train_data, shuffle=True, batch_size=batch_size, num_workers=4, drop_last=True)

data set 
44
data loader


In [130]:
# Instantiate model
autoencoder = AutoEncoder(code_size)
loss_fn = nn.BCELoss()
optimizer = optimizer_cls(autoencoder.parameters(), lr=lr)

In [136]:
# Training loop
for epoch in range(num_epochs):
    print("Epoch %d" % epoch)
    
    print('before')
    for sample_data in dataloader:    # train_loader
        #print(sample_data)
        #raise ValueError()
        out, code = autoencoder(Variable(sample_data['images'])) #images
        
        optimizer.zero_grad()
        loss = loss_fn(out, images)
        loss.backward()
        optimizer.step()
        
    print("Loss = %.3f" % loss.data[0])

Epoch 0
before
---- df loc-----
0    stirring/yt-SDSV9SXJ4MY_100.mp4
1                           stirring
2                                  4
3                                  0
Name: 3043, dtype: object
idx 3043
file loc stirring/yt-SDSV9SXJ4MY_100.mp4
---- df loc-----
0    steering/yt-gpaFEA4CQj0_83.mp4
1                          steering
2                                 4
3                                 0
Name: 9836, dtype: object
idx 9836
file loc steering/yt-gpaFEA4CQj0_83.mp4
---- df loc-----
0    resting/getty-artist-relaxing-with-coffee-vide...
1                                              resting
2                                                    4
3                                                    0
Name: 2151, dtype: object
idx 2151
file loc resting/getty-artist-relaxing-with-coffee-video-id645458562_7.mp4
---- df loc-----
0    feeding/yt-0oong-24ino_15.mp4
1                          feeding
2                                5
3                                0
Name

RuntimeError: Given groups=1, weight of size [6, 3, 5, 5], expected input[4, 128, 128, 3] to have 3 channels, but got 128 channels instead

In [132]:
# Try reconstructing on test data
test_image = random.choice(test_data)
test_image = Variable(test_image.view([1, 1, IMAGE_WIDTH, IMAGE_HEIGHT]))
test_reconst, _ = autoencoder(test_image)

NameError: name 'test_data' is not defined

In [None]:
torchvision.utils.save_image(test_image.data, 'orig.png')
torchvision.utils.save_image(test_reconst.data, 'reconst.png')