In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import cv2

from collections import deque

from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader, random_split


In [2]:
import torch.backends.cudnn as cudnn
cudnn.benchmark = True

# Dataset

In [3]:
transform = transforms.Compose([
    transforms.Resize((224,244)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0),
    transforms.RandomCrop(size=(200,200))
])
mydata = ImageFolder(root='../../data/Sports/data', transform=transform)
labels_ = mydata.classes

print('Number of data : ', len(mydata))
print('Labels of classes : ', labels_)

Number of data :  2789
Labels of classes :  ['fencing', 'shooting', 'swimming', 'tennis', 'weight_lifting']


In [None]:
# idxs = np.arange(0, len(mydata))
# np.random.shuffle(idxs)

trainset, test_set = random_split(mydata, [2500, 289])

train_set, val_set = random_split(trainset, [2200, 300])

train_loader = DataLoader(train_set, batch_size=32)
val_loader = DataLoader(val_set, batch_size=8)
test_loader = DataLoader(test_set, batch_size=32)

In [None]:
img_ex = iter(train_loader).next()
img_ex[0].shape

In [None]:
def show_img(tensor):
    
    print(tensor[1][5])
    tensor = tensor[0][5].permute(1,2,0)
    arr = np.array(tensor)
    
    plt.imshow(arr)
    
show_img(img_ex)

# Model

In [None]:
resnet = models.resnet50(pretrained=True)
feature_layers = nn.Sequential(*list(resnet.children())[:-1])

class MyModel(nn.Module):
    
    def __init__(self, num_class):
        super(MyModel, self).__init__()
        
        self.features = feature_layers
        
        self.flat = nn.Flatten()
        
        self.fc1 = nn.Linear(2048, 512)
        self.fc2 = nn.Linear(512, num_class)
        self.drop = nn.Dropout(p=0.5)
        
    def forward(self, x):
        x = self.features(x)
        x = self.flat(x)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        
        return x

model = MyModel(num_class=6)

for p in model.features.parameters():
    p.requires_grad = False

# Model2

In [None]:
model = models.vgg16(pretrained=True)
model.classifier[6] = nn.Linear(in_features=4096, out_features=len(labels_), bias=True)

for p in model.features.parameters():
    p.requires_grad = False

In [None]:
with torch.no_grad():
    model.eval()
    
    output = model(img_ex[0])
    print(output.shape)

# Model3

In [11]:
model3 = models.resnet152(pretrained=True)
model3

Linear(in_features=2048, out_features=1000, bias=True)

In [15]:
classifier = nn.Sequential([
    nn.Linear(2048, 1024, bias=True),
    nn.Linear(1024, 512, bias=True),
    nn.Linear(512, 5, bias=True)
])

model3.fc = classifier

TypeError: list is not a Module subclass

In [12]:
type(model3.fc)

torch.nn.modules.linear.Linear

# loss, optimizer

In [None]:
import torch.optim as optim

loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1, verbose=True)

print(optimizer)
print(scheduler)

# GPU

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(device)

# Train

In [None]:
import time

time.time()

In [None]:
EPOCH = 60
train_loss_list, val_loss_list = [], []

for e in range(EPOCH):
    start_time = time.time()
    train_loss = 0
    train_correct = 0
    
    model.to(device)
    model.train()
    
    for i, data in enumerate(train_loader):
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        output = model(images)
        
        loss = loss_func(output, labels)
        
        loss.backward()
        
        optimizer.step()
        
        train_loss += loss.cpu().item()
        
        pred = output.argmax(1)
        train_correct += (pred == labels).cpu().sum().item()
        
    train_acc = train_correct / len(trainset)
    train_loss = train_loss / len(trainset)
    train_loss_list.append(train_loss)
    
    val_loss = 0
    val_correct = 0
    
    model.eval()
    
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            
            output = model(images)
            
            loss = loss_func(output, labels)
            
            val_loss += loss.cpu().item()
            
            val_pred = output.argmax(1)
            val_correct += (val_pred == labels).cpu().sum().item()
            
        val_acc = val_correct / len(val_set)
        val_loss = val_loss / len(val_set)
        val_loss_list.append(val_loss)
        
    scheduler.step()
    end_time = time.time()
    info = '[EPOCH {}/{}] : train-loss = {:0.4f} | train-acc = {:0.4f} | val-loss = {:0.4f} | val-acc = {:0.4f} | time = {:0.2f}'
    print(info.format(e+1, EPOCH, train_loss, train_acc, val_loss, val_acc, end_time-start_time))
    
    torch.save(model.state_dict(), './vgg16_class6.pt')

In [None]:
test_loader = DataLoader(test_set, batch_size=4)

model.cpu()
model.eval()

test_correct = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data

        prediction = model(images)
        
        test_correct += (prediction.argmax(1) == labels).sum().item()
        
    
    test_acc = test_correct / len(test_set)
    print('Accuracy of Test Data : %0.2f%%' %(test_acc*100))

In [None]:
plt.plot(train_loss_list)
plt.plot(val_loss_list)

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train-loss', 'Val-loss'])

plt.show()

# Load Model

In [None]:
resnet = models.resnet50(pretrained=True)
feature_layers = nn.Sequential(*list(resnet.children())[:-1])

class MyModel(nn.Module):
    
    def __init__(self, num_class):
        super(MyModel, self).__init__()
        
        self.features = feature_layers
        
        self.flat = nn.Flatten()
        
        self.fc1 = nn.Linear(2048, 512)
        self.fc2 = nn.Linear(512, num_class)
        self.drop = nn.Dropout(p=0.5)
        
    def forward(self, x):
        x = self.features(x)
        x = self.flat(x)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        
        return x

model = MyModel(num_class=6)
model.load_state_dict(torch.load('./resnet50_normalized_1.pt'))

for p in model.features.parameters():
    p.requires_grad = False
    
model.eval()

In [6]:
model_trained = models.vgg16(pretrained=True)
model_trained.classifier[6] = nn.Linear(in_features=4096, out_features=len(labels_), bias=True)
model_trained.load_state_dict(torch.load('./weights/vgg16.pt'))
model_trained

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
def test_image(img_pil, frame=None):
    
    plt.imshow(img_pil)
    
    if type(img_pil) == torch.Tensor:
        img_pil = img_pil.squeeze(0)
        img_pil = img_pil.permute(1,2,0)
        img_pil = Image.fromarray(np.array(img_pil))
        print(img_pil.shape)
    
    elif type(img_pil) == np.ndarray:
        img_pil = Image.fromarray(img_pil)  
    
    tran = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
    ])
    
    transed = tran(img_pil)
    image_tensor = transed.unsqueeze(0)
    
    model_trained.eval()
    pred = model_trained(image_tensor)
    label = pred.argmax(1)
    cls = labels_[label]
    
    print(cls)
    
    return transed

In [None]:
from PIL import Image

example_img = Image.open('../../data/Sports/test/weightlift.jpg')
output = test_image(example_img)

In [7]:
def normalize(frame):
    norm = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
    tensor = torch.from_numpy(frame.astype(np.float32)).permute(2,0,1)
    
    normalized = norm(tensor)
    
    result = normalized.unsqueeze(0)
    
    return result

In [8]:
Q = deque(maxlen=64)

vs = cv2.VideoCapture('../../data/Sports/weight.mp4')
writer = None
(W, H) = (None, None)

while True:
    (grabbed, frame) = vs.read()
    
    if not grabbed:
        break
        
    if W is None or H is None:
        (H, W) = frame.shape[:2]
        
    output = frame.copy()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (224, 224)).astype('float32')
    frame = normalize(frame)
    
    preds = model_trained.cpu()(frame)
    print('-----')
    preds = F.softmax(preds, dim=1)[0]
    preds = preds.detach().numpy()
    
    Q.append(preds)
    
    results = np.array(Q).mean(axis=0)
    i = np.argmax(results)
    
    label = labels_[i]
    
    text = "activity: {}".format(label)
    cv2.putText(output, text, (35, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.25, (0, 255, 0), 5)
    
    if writer is None:
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter('weight_1frame1.avi', fourcc, 30, (W, H), True)
        
    writer.write(output)
            
print("[INFO] cleaning up...")
writer.release()
vs.release()

-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
-----
----

In [None]:
a = model.cpu()(frame)
a.shape

In [None]:
F.softmax()

In [None]:
Q[0].shape

In [None]:
preds

In [None]:
i