In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
import torchvision.models as models

In [2]:
rootpath = "../data"   ## subject to change
resultpath = os.path.join(rootpath, 'results')

In [3]:
import torch
import torchvision.transforms as trn
from torch.utils.data import Dataset
import glob
import os
from PIL import Image

def buildIndexLabelMapping() :
    idx2label = os.listdir(os.path.join(rootpath, 'Moments_in_Time_Mini/jpg/validation'))
    label2idx = {}
    for i, label in enumerate(idx2label) :
        label2idx[label] = i
    return idx2label, label2idx

idx2label, label2idx = buildIndexLabelMapping()
    

class Moments(Dataset) :
    """
    A customized data loader for Moments-In-Time dataset.
    """    
    def __init__(self, subset='validation', use_frames=16) :
        super().__init__()
        root = os.path.join(rootpath, 'Moments_in_Time_Mini/jpg', subset)     
        self.use_frames = use_frames
        
        self.filenames = []

        for video_path in glob.glob(os.path.join(root, "*/*")) :
            label = video_path.split('/')[-2]
            self.filenames.append((video_path, label2idx[label]))
        self.len = len(self.filenames)
        
        self.tf = trn.Compose([trn.Resize((224, 224)), 
                               trn.ToTensor(), 
                               trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ## subject to change
                              ])
    
    def __getitem__(self, index) :
        video_path, label = self.filenames[index]
        tot_frames = len(os.listdir(video_path)) - 1
        video = []
        time_spacing = (tot_frames-1)//(self.use_frames-1)
        for i in range(1, 1+self.use_frames * time_spacing, time_spacing) :
            img = Image.open(os.path.join(video_path, 'image_{:05d}.jpg'.format(i))).convert('RGB')
            video.append(self.tf(img))
        return torch.stack(video, dim=1), label

    def __len__(self) :
        return self.len

In [4]:
use_cuda = torch.cuda.is_available()
torch.manual_seed(123)
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

cuda


In [5]:
trainset = Moments(subset='training')
valset = Moments(subset='validation')
print("Number of training videos:", len(trainset))
print("Number of validation videos:", len(valset))

Number of training videos: 100000
Number of validation videos: 10000


In [6]:
# video_info = trainset.__getitem__(9263)
# print(video_info[0].shape)
# print(video_info[1])

In [7]:
trainset_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=8)
valset_loader = DataLoader(valset, batch_size=64, shuffle=True, num_workers=8)

In [8]:
class FrameResNet50(nn.Module) :
    def __init__(self, use_pretrain=-1, num_classes=200) :
        super().__init__()
        self.frame_model = models.resnet18(num_classes=num_classes) ## back to 50
        if (use_pretrain >= 0) :
            self.loadPretrainedParam(use_pretrain)
        
    def forward(self, x) :
        B, C, T, H, W = x.shape
        h = x.permute(0, 2, 1, 3, 4).contiguous().view(-1, C, H, W)
        logits = self.frame_model(h)
        log_prob = F.log_softmax(logits, dim=1).view(B, T, -1).mean(dim=1)
        return log_prob     
    
    def loadPretrainedParam(self, n_levels) :
        assert(n_levels <= 4)
        # resnet_imgnet_checkpoint = torch.load(os.path.join(rootpath, 'models/resnet50-19c8e357.pth'))
        resnet_imgnet_checkpoint = torch.load(os.path.join(rootpath, 'models/resnet18-5c106cde.pth'))  ## back to 50
        states_to_load = {}
        for name, param in resnet_imgnet_checkpoint.items() :
            if name.startswith('fc') :
                continue
            if name.startswith('layer') :
                if int(name[5]) <= n_levels :
                    states_to_load[name]=param
            else :
                states_to_load[name]=param
        model_state = self.frame_model.state_dict()
        model_state.update(states_to_load)
        self.frame_model.load_state_dict(model_state)
        

In [9]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'state_dict': model.state_dict(),
             'optimizer' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to %s' % checkpoint_path)
    
def load_checkpoint(checkpoint_path, model, optimizer):
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    print('model loaded from %s' % checkpoint_path)

In [10]:
def test():
    model.eval()  # set evaluation mode
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in testset_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(testset_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(testset_loader.dataset),
        100. * correct / len(testset_loader.dataset)))

In [11]:
def train_save(epoch, model, optimizer, log_interval=100):
    model.train()  # set training mode
    iteration = 0
    for ep in range(epoch):
        for batch_idx, (data, target) in enumerate(trainset_loader):
            print("iteration =", iteration)
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            if iteration % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    ep, batch_idx * len(data), len(trainset_loader.dataset),
                    100. * batch_idx / len(trainset_loader), loss.item()))
            iteration += 1
        save_checkpoint(os.path.join(resultPath, '2d_resnet-%i.pth'%ep+1), model, optimizer)
        test()
    
    # save the final model
    save_checkpoint(os.path.join(resultPath, '2d_resnet-final.pth'), model, optimizer)

In [12]:
model = FrameResNet50(use_pretrain=-1).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
train_save(1, model, optimizer)

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1524590031827/work/aten/src/THC/generic/THCStorage.cu:58