In [6]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np

In [7]:
from data.dataset import VideoDataSet
from sklearn.preprocessing import LabelEncoder
train_dataset = VideoDataSet('train')
le = LabelEncoder()
label = train_dataset.get_label()
print(label)
le.fit(label)

['TennisSwing', 'Punch', 'CricketShot', 'ShavingBeard', 'PlayingCello']


In [8]:

def collate_fn(batch):
    frames = [b[0] for b in batch]
    masks = [torch.ones(b[0].shape[0]) for b in batch]
    frames = torch.nn.utils.rnn.pad_sequence(frames)
    masks = torch.nn.utils.rnn.pad_sequence(masks)

    frames = torch.transpose(frames, 0 , 1)
    masks = torch.transpose(masks, 0 , 1)
    item = le.transform([b[1] for b in batch])
    labels = torch.tensor(item)
    return (frames, masks), labels

data_loader = torch.utils.data.DataLoader(train_dataset,batch_size=32, collate_fn = collate_fn)

In [9]:

import Model
size = len(data_loader)
model = Model.CNNRNN(5)
model.to('cuda')
model.train()


loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for i in range(100):
    correct = 0
    for batch, (X, y) in enumerate(data_loader):
        frames, mask = X
        frames = frames.to('cuda')
        y = y.to('cuda').type(torch.int64)
        pred = model(frames)
        loss = loss_fn(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    print(f'loss:{loss.item():>7f} acc:{correct / len(data_loader.dataset) :>2f}')

KeyboardInterrupt: 