In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision
import torchvision.transforms as T
import torchvision.datasets as Data
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import numpy as np
import scipy.io
import torchvision.models.inception as inception


In [6]:
label_mat = scipy.io.loadmat('../data/q3_2_data.mat')
label_train = label_mat['trLb']
print('train len:', len(label_train))
label_val = label_mat['valLb']
print('val len:', len(label_val))

train len: 7770
val len: 2230


In [16]:
class ActionDataset(Dataset):
    def __init__(self, root_dir, labels=[], transform = None):
        """
            root_dir (string): 整个数据的路径。
            labels(list): 图片的标签。
            transform (callable, optional): 想要对数据进行的处理函数。
        """
        self.root_dir = root_dir
        self.transform = transform
        self.length = len(os.listdir(self.root_dir))
        self.labels = labels
    def __len__(self):
        return self.length * 3
    def __getitem__(self, idx):
        folder = idx//3 + 1
        imidx = idx % 3 +1
        folder = format(folder, '05d')
        imgname = str(imidx) + '.jpg'
        img_path = os.path.join(self.root_dir, folder, imgname)
        image = Image.open(img_path)
        
        if len(self.labels)!=0:
            Label = self.labels[idx//3][0] - 1
        if self.transform:
            image = self.transform(image)
        if len(self.labels)!=0:
            sample = {'image':image,'img_path':img_path, 'Label':Label}
        else:
            sample = {'image':image, 'img_path':img_path}
        return sample

In [27]:
image_dataset = ActionDataset(root_dir = '../data/trainClips/', labels = label_train,transform = T.ToTensor())
for i in range(1):
    sample = image_dataset[i]
    print(sample['image'].shape)
    print(sample['Label'])
    print(sample['img_path'])

tensor([[[0.2980, 0.2353, 0.3569,  ..., 0.2588, 0.3373, 0.3961],
         [0.3686, 0.2353, 0.2353,  ..., 0.2627, 0.3216, 0.3608],
         [0.3961, 0.2471, 0.1490,  ..., 0.3647, 0.4078, 0.4353],
         ...,
         [0.3608, 0.3608, 0.3647,  ..., 0.2902, 0.2745, 0.2510],
         [0.3725, 0.3725, 0.3686,  ..., 0.2667, 0.2510, 0.2353],
         [0.3569, 0.3529, 0.3529,  ..., 0.2510, 0.2392, 0.2314]],

        [[0.3137, 0.2588, 0.3804,  ..., 0.3020, 0.3765, 0.4353],
         [0.3804, 0.2588, 0.2627,  ..., 0.3059, 0.3647, 0.4000],
         [0.4157, 0.2667, 0.1765,  ..., 0.4078, 0.4510, 0.4784],
         ...,
         [0.3608, 0.3608, 0.3647,  ..., 0.2235, 0.2157, 0.2078],
         [0.3725, 0.3725, 0.3686,  ..., 0.2000, 0.2000, 0.1922],
         [0.3569, 0.3529, 0.3529,  ..., 0.1843, 0.1882, 0.1882]],

        [[0.1843, 0.1255, 0.2392,  ..., 0.1843, 0.2706, 0.3294],
         [0.2588, 0.1255, 0.1294,  ..., 0.1804, 0.2471, 0.2941],
         [0.2980, 0.1412, 0.0431,  ..., 0.2824, 0.3333, 0.

In [23]:
image_dataloader = DataLoader(image_dataset, batch_size = 4, shuffle = True, num_workers = 4)

for i, sample in enumerate(image_dataloader):
    sample['image'] = sample['image']
    print(i, sample['image'].shape,sample['img_path'],sample['Label'])
    if i >5 :
        break

0 torch.Size([4, 3, 64, 64]) ['../data/trainClips/05009/3.jpg', '../data/trainClips/05970/3.jpg', '../data/trainClips/03846/3.jpg', '../data/trainClips/02949/3.jpg'] tensor([6., 7., 4., 3.], dtype=torch.float64)
1 torch.Size([4, 3, 64, 64]) ['../data/trainClips/06691/3.jpg', '../data/trainClips/00537/2.jpg', '../data/trainClips/06780/2.jpg', '../data/trainClips/03155/3.jpg'] tensor([8., 0., 8., 3.], dtype=torch.float64)
2 torch.Size([4, 3, 64, 64]) ['../data/trainClips/06603/3.jpg', '../data/trainClips/07057/3.jpg', '../data/trainClips/03539/1.jpg', '../data/trainClips/01237/3.jpg'] tensor([8., 8., 4., 1.], dtype=torch.float64)
3 torch.Size([4, 3, 64, 64]) ['../data/trainClips/05196/1.jpg', '../data/trainClips/01912/3.jpg', '../data/trainClips/00105/3.jpg', '../data/trainClips/02259/3.jpg'] tensor([6., 2., 0., 2.], dtype=torch.float64)
4 torch.Size([4, 3, 64, 64]) ['../data/trainClips/00468/3.jpg', '../data/trainClips/02278/2.jpg', '../data/trainClips/04430/3.jpg', '../data/trainClips/

In [24]:
image_dataset_train = ActionDataset(root_dir = '../data/trainClips/', labels = label_train,transform = T.ToTensor())
image_dataloader_train = DataLoader(image_dataset_train, batch_size = 64, shuffle = True, num_workers = 4)

image_dataset_val = ActionDataset(root_dir = '../data/valClips/', labels = label_val,transform = T.ToTensor())
image_dataloader_val = DataLoader(image_dataset_val, batch_size = 64, shuffle = True, num_workers = 4)

image_dataset_test = ActionDataset(root_dir = '../data/testClips/', labels = [],transform = T.ToTensor())
image_dataloader_test = DataLoader(image_dataset_test, batch_size = 64, shuffle = True, num_workers = 4)


In [25]:
dtype = torch.FloatTensor
print_every = 100
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()


In [28]:
class Flatten(nn.Module):
    def forward(self,x):
        N, C, H, W = x.size()
        return x.view(N, -1)

In [72]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size = 7, stride = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2, stride = 2),
            nn.Conv2d(8, 16, kernel_size = 7, stride = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2, stride = 2),
            Flatten(),
            nn.ReLU(inplace = True),
            nn.Linear(1936, 10)
        )
    def forward(self,x):
        return self.model(x)

In [90]:
x = torch.randn(32, 3, 64,64).type(dtype).requires_grad_(True)
x_var = Variable(x.type(dtype))
x_var.shape

torch.Size([32, 3, 64, 64])

In [93]:
def train(model, loss_fn, optimizer, dataloader, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' %(epoch+1, num_epochs))
        check_accuracy(fixed_model, image_dataloader_val)
        
        model.train() # 模型的.train()方法让模型进入训练模式，参数保留梯度，dropout层等部分正常工作。
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample['image'])   # 取得一个batch的图像数据。
            y_var = Variable(sample['Label'].long()) # 取得对应的标签。

            scores = model(x_var)   # 得到输出。
            
            loss = loss_fn(scores, y_var)   # 计算loss。
            if (t + 1) % print_every == 0:  # 每隔一段时间打印一次loss。
                print('t = %d, loss = %.4f' % (t + 1, loss.item()))

            # 三步更新参数。
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        

In [94]:
def check_accuracy(model, loader):
    num_correct = 0
    num_samples = 0
    model.eval()
    for t, sample in enumerate(loader):
        x_var = Variable(sample['image'])
        y_var = sample['Label']
        scores = model(x_var)
        _,preds = scores.data.max(1)
        num_correct+=(preds.numpy()==y_var.numpy()).sum()
        num_samples+=preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' %(num_correct, num_samples, 100*acc))
    

In [95]:
optimizer = optim.RMSprop(net.parameters(), lr = 0.0001)
loss_fn = nn.CrossEntropyLoss()


In [96]:
torch.random.manual_seed(54321)
net.cpu()
net.apply(reset)
net.train()
train(net, loss_fn, optimizer, dataloader= image_dataloader_train, num_epochs=5)
check_accuracy(net, image_dataloader_val)

Starting epoch 1 / 5
Got 813 / 6690 correct (12.15)
t = 100, loss = 1.7234
t = 200, loss = 1.5962
t = 300, loss = 1.3262
Starting epoch 2 / 5
Got 813 / 6690 correct (12.15)
t = 100, loss = 1.2748
t = 200, loss = 1.1657
t = 300, loss = 1.3598
Starting epoch 3 / 5
Got 813 / 6690 correct (12.15)
t = 100, loss = 0.8892
t = 200, loss = 0.8928
t = 300, loss = 1.0902
Starting epoch 4 / 5
Got 813 / 6690 correct (12.15)
t = 100, loss = 1.0899
t = 200, loss = 0.8640
t = 300, loss = 1.0113
Starting epoch 5 / 5
Got 813 / 6690 correct (12.15)
t = 100, loss = 1.0518
t = 200, loss = 1.0053
t = 300, loss = 0.7456
Got 3105 / 6690 correct (46.41)
