In [19]:
import torch.nn as nn
import torch.optim as optim

from torchvision.transforms import ToTensor, Normalize
from torch.utils.data import DataLoader

from torch.optim.adam import Adam
from gulpio import transforms
from gulpio.dataset import GulpVideoDataset 

In [None]:
batch_size = 16
step_size = 1
num_frames = 24
num_workers = 8

frame_size = (96, 96)

In [4]:
# -- Datasets (Objects: transformation, Config: data_dir, custom_options)
train_data = GulpVideoDatasetBoundingBoxes(data_path='/mnt/data02/data/20bn-jester/gulp/training',
                                           num_frames=num_frames,
                                           step_size=step_size,
                                           is_val=False,
                                           stack=False,
                                           transform=train_transform,
                                           target_transform=lambda label_tuple: label_tuple[0])

val_data = GulpVideoDatasetBoundingBoxes(data_path='/mnt/data02/data/20bn-jester/gulp/validation',
                                         num_frames=num_frames,
                                         step_size=step_size,
                                         is_val=True,
                                         stack=False,
                                         transform=validation_transform,
                                         target_transform=lambda label_tuple: label_tuple[0])

NameError: name 'GulpVideoDatasetBoundingBoxes' is not defined

In [5]:
# -- Data loaders (Objects: dataset, Config: batch_size, num_workers, shuffle)
train_loader = DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    drop_last=True
)
val_loader = DataLoader(
    val_data,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    drop_last=True)

NameError: name 'train_data' is not defined

In [22]:
# -- Transformations
train_transform = transforms.ComposeVideo(
    img_transforms=[
        # transforms.Scale(config['frame_size']),
        transforms.Scale((128, 128)),
        transforms.RandomCrop(frame_size),
        ToTensor(),
        Normalize(
            mean=(0.485, 0.456, 0.406),  # default values for imagenet
            std=(0.229, 0.224, 0.225)
        )
    ],
    video_transforms=[
        lambda frames: torch.stack(frames),
        lambda frames_tensor: frames_tensor.permute(1, 0, 2, 3)  # channels first
    ]
)
validation_transform = transforms.ComposeVideo(
    img_transforms=[
        # transforms.Scale(config['frame_size']),
        transforms.Scale((128, 128)),
        transforms.CenterCrop(frame_size),
        ToTensor(),
        Normalize(
            mean=(0.485, 0.456, 0.406),  # default values for imagenet
            std=(0.229, 0.224, 0.225)
        )
    ],
    video_transforms=[
        lambda frames: torch.stack(frames),
        lambda frames_tensor: frames_tensor.permute(1, 0, 2, 3)  # channels first
    ]
)

NameError: name 'config' is not defined

# define network

In [6]:
class GlobalSpatialMaxPooling(nn.Module):
    """
    Global maxpooling operation on the last two dimensions.
    # Input shape
        5D tensor with shape: `(samples, channels, steps, height, width)`.
    # Output shape
        5D tensor with shape: `(samples, channels, steps, 1, 1)`.
    """

    def forward(self, x):
        kernel_size = (1,) + tuple(x.size()[-2:])
        return F.max_pool3d(x, kernel_size=kernel_size)

In [14]:
class TimeDistributedLinear(nn.Module):
    def __init__(self, num_in, num_out, activation):
        super(TimeDistributedLinear, self).__init__()
        self.linear = nn.Linear(num_in, num_out)
        self.activation = activation

    def forward(self, x):
        if x.ndimension() == 2:
            return self.activation(self.linear(x))
        batch_size, num_steps, num_in = x.size()
        x = x.contiguous().view(-1, num_in)
        x = self.activation(self.linear(x))
        return x.view(batch_size, num_steps, x.size()[1])

In [23]:
class MyNet(nn.Module):
    #def _create_conv(self, i, l, p):         
    #    setattr(self, 'conv'+str(i),  nn.Conv3d(in_channels=3, out_channels, kernel_size, bias=False))
    #    setattr(self, 'bnorm'+str(i), nn.BatchNorm3d())
    #    if p:
    #        setattr(self, 'pool'+str(i),  nn.MaxPool3d((1, 2, 2)))
    #    setattr(self, 'act1'+str(i),  nn.ReLU())

    #def _conv_forward(self, i, x):
    #    h = self.
        
        
    def __init__(self):
        super(MyNet, self).__init__()
        
        self.use_cuda = False
        self.gpus = [0]
        #self.initialization_scheme = initialization_scheme
        
        num_features = 256
        num_classes = 30
        
        kernel_base = 32
        layers = [kernel_base, 2*kernel_base, 4*kernel_base, 8*kernel_base, 8*kernel_base, 8*kernel_base]
        self._pool = [True, True, True, False, False, False]
        
        # L1
        self.conv1  = nn.Conv3d(in_channels=3, out_channels=layers[0], kernel_size=(3,3,3), bias=False)
        self.bnorm1 = nn.BatchNorm3d(layers[0])
        self.pool1  = nn.MaxPool3d((1, 2, 2))
        self.act1   = nn.ReLU()
        
        # L2
        self.conv2  = nn.Conv3d(in_channels=layers[0], out_channels=layers[1], kernel_size=(3,3,3), bias=False)
        self.bnorm2 = nn.BatchNorm3d(layers[1])
        self.pool2  = nn.MaxPool3d((1, 2, 2))
        self.act2   = nn.ReLU()
        
        # L3
        self.conv3  = nn.Conv3d(in_channels=layers[1], out_channels=layers[2], kernel_size=(3,3,3), bias=False)
        self.bnorm3 = nn.BatchNorm3d(layers[2])
        self.pool3  = nn.MaxPool3d((1, 2, 2))
        self.act3   = nn.ReLU()
        
        # L4
        self.conv4  = nn.Conv3d(in_channels=layers[2], out_channels=layers[3], kernel_size=(3,3,3), bias=False)
        self.bnorm4 = nn.BatchNorm3d(layers[3])
        self.act4   = nn.ReLU()
        
        # L5
        self.conv5  = nn.Conv3d(in_channels=layers[3], out_channels=layers[4], kernel_size=(3,3,3), bias=False)
        self.bnorm5 = nn.BatchNorm3d(layers[4])
        self.act5   = nn.ReLU()
        
        # L6
        self.conv6  = nn.Conv3d(in_channels=layers[4], out_channels=layers[5], kernel_size=(3,3,3), bias=False)
        self.bnorm6 = nn.BatchNorm3d(layers[5])
        self.pool6 = GlobalSpatialMaxPooling()
        self.act6   = nn.ReLU()
        
        # L7
        self.lstm = nn.LSTM(8 * kernel_base, num_features, 2, batch_first=True,
                    bidirectional=False)
        
        self.logsoftmax = TimeDistributedLinear(num_features, num_classes,
                                                nn.LogSoftmax())
        
    def extract_features(self, x):
        # L1
        h = self.conv1(x)
        h = self.bnorm1(h)
        h = self.pool1(h)
        h = self.act1(h)
    
        # L2
        h = self.conv2(x)
        h = self.bnorm2(h)
        h = self.pool2(h)
        h = self.act2(h)
    
        # L3
        h = self.conv3(x)
        h = self.bnorm3(h)
        h = self.pool3(h)
        h = self.act3(h)
    
        # L4
        h = self.conv4(x)
        h = self.bnorm4(h)
        h = self.act4(h)
    
        # L5
        h = self.conv5(x)
        h = self.bnorm5(h)
        h = self.act5(h)
    
        # L6
        h = self.conv6(x)
        h = self.bnorm6(h)
        h = self.pool6(h)
        h = self.act6(h)
        
        h = h.view(h.size()[0:3])
        h = h.permute(0, 2, 1)
        
        # Batch, Time, Channel, Col, Row
        
        self.lstm.flatten_parameters()
        features = self.lstm(h, None)[0]
        
        return features
    
    def forward(self, x):
        features = self.extract_features(x)
        
        probs = self.logsoftmax(features)
        if probs.ndimension() == 3:
            probs = probs.mean(dim=1)
        return probs
        
    
        
        
        
        
    
    

In [24]:
model = MyNet()


optimizer_params={'lr': 0.01, 'momentum': 0.9, 'output_dir': './out'}
optimizer = optim.Adam(model.parameters(), **optimizer_params)

TypeError: __init__() missing 1 required positional argument: 'self'

In [21]:
n = 10
use_cuda = False
for epoch in range(n):
    
    for batch_idx, (x, target) in enumerate(data_loader):
        if use_cuda:
            x = x.cuda()
            target = target.cuda()
                    
            # optimization step  
            model.train(mode=True) # training mode
            optimizer.zero_grad()
            
            # turn tensors into autograd variables                                                                              
            x, target = Variable(x_batch), Variable(target_batch)
            # forward pass                                                                                                      
            output = self.model(x)
            loss = self.criterion(output, target)
            # backward pass                                                                                                     
            loss.backward()
            # update                                                                                                            
            self.optimizer.step()
                                                                                  
            train_loss = loss.data[0]
            pred_labels = get_pred_label_from_output(output, multiclass=self.multiclass).data
            pred_labels = self._cast_pred_labels(pred_labels)
            train_accuracy = ((pred_labels == target).sum() / target.size(0))
  


NameError: name 'data_loader' is not defined