In [6]:
!pip install pytorchvideo 
!pip install mediapipe

[0m

In [7]:
import os
import math
import cv2
import pickle
import random
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import transforms

import mediapipe as mp
import matplotlib.pyplot as plt

In [8]:
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.data.encoded_video_pyav import EncodedVideoPyAV
from torchvision.transforms import Compose, Lambda

from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo
)

In [9]:
path = '/kaggle/input/mixkit'
frame_width = 1920
frame_height = 1080

out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc(*'XVID'), 30, (frame_width,frame_height))

for root, dirs, files in os.walk(path):
    for video_path in files:
        print(video_path)
        cap = cv2.VideoCapture(os.path.join(root, video_path))
        while (cap.isOpened()):
            ret, frame = cap.read()
            if ret == True:
                out.write(cv2.resize(frame, (frame_width,frame_height)))
            else:
                break

# out.release()
            
    

In [10]:
# path = '/kaggle/input/mixkit'
# frame_width = 1920
# frame_height = 1080

# list_path = ['/kaggle/input/mixkit/surprise1.mp4',
#             '/kaggle/input/mixkit/anger1.mp4',
#             '/kaggle/input/mixkit/sadness2.mp4',
#             '/kaggle/input/mixkit/happy1.mp4']

# out = cv2.VideoWriter('outpy6.mp4',cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width,frame_height))

# total = 0
# print('video_path frames total_frames')
# for video_path in list_path:
#     counter = 0
#     cap = cv2.VideoCapture(os.path.join(root, video_path))
#     while (cap.isOpened()):
#         ret, frame = cap.read()
#         if ret == True:
#             counter += 1
#             total += 1
#             out.write(cv2.resize(frame, (frame_width,frame_height)))
#         else:
#             break
#     print(video_path, counter, total)

# # out.release()

In [11]:
def add_g(image_array, mean=0.0, var=30):
    std = var ** 0.5
    image_add = image_array + np.random.normal(mean, std, image_array.shape)
    image_add = np.clip(image_add, 0, 255).astype(np.uint8)
    return image_add

def flip_image(image_array):
    return cv2.flip(image_array, 1)

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    
def generate_flip_grid(w, h, device):
    # used to flip attention maps
    x_ = torch.arange(w).view(1, -1).expand(h, -1)
    y_ = torch.arange(h).view(-1, 1).expand(-1, w)
    grid = torch.stack([x_, y_], dim=0).float().to(device)
    grid = grid.unsqueeze(0).expand(1, -1, -1, -1)
    grid[:, 0, :, :] = 2 * grid[:, 0, :, :] / (w - 1) - 1
    grid[:, 1, :, :] = 2 * grid[:, 1, :, :] / (h - 1) - 1
    grid[:, 0, :, :] = -grid[:, 0, :, :]
    return grid

In [12]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)
    


def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=8631, include_top=True):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.include_top = include_top
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
#         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, 
                  return_indices=False, ceil_mode=False)
        
        
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        
        if not self.include_top:
            return x
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [13]:
class Model(nn.Module):
    
    def __init__(self, pretrained=True, num_classes=7):
        super(Model, self).__init__()
        resnet50 = ResNet(Bottleneck, [3, 4, 6, 3])        
        self.features = nn.Sequential(*list(resnet50.children())[:-2])  
        self.features2 = nn.Sequential(*list(resnet50.children())[-2:-1])  
        self.fc = nn.Linear(2048, 7)  
        
        
    def forward(self, x):        
        x = self.features(x)
        #### 1, 2048, 7, 7
        feature = self.features2(x)
        #### 1, 2048, 1, 1
        
        feature = feature.view(feature.size(0), -1)
        output = self.fc(feature)
        
        params = list(self.parameters())
        fc_weights = params[-2].data
        fc_weights = fc_weights.view(1, 7, 2048, 1, 1)
        fc_weights = Variable(fc_weights, requires_grad = False)

        # attention
        feat = x.unsqueeze(1) # N * 1 * C * H * W
        hm = feat * fc_weights
        hm = hm.sum(2) # N * self.num_labels * H * W

        return output, hm

In [14]:
VAL_CSV = pd.read_csv(
    '/kaggle/input/ckp-video/val.txt',
    sep=' ',
)

VAL_DIR = '/kaggle/input/ckp-video/Val'

TRAIN_CSV = pd.read_csv(
    '/kaggle/input/ckp-video/train.txt',
    sep=' ',
)

TRAIN_DIR = '/kaggle/input/ckp-video/Train'

weight_path = '/kaggle/input/eac-raf-weight/epoch55_acc_0.897001.pth'

device_name=0
workers=2


In [15]:
label = {'name': ['Surprise', 'Fear', 'Disgust', 'Happiness', 'Sadness', 'Anger', 'Neutral'],
         'color': ['blue', 'orange', 'green', 'red', 'darkviolet', 'brown', 'pink']}

In [16]:
def plot_probability(data, title,  figsize, save_dir):
    file_name = os.path.join(save_dir, title + '.png')    
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot(data)
    ax.set_title(title)
    for index, line in enumerate(plt.gca().get_lines()):
        line.set_color(label['color'][index])
    
    ax.legend(label['name'],
                  loc='upper left')
    
    fig.savefig(file_name)
    plt.close(fig)
        
def plot_classes(data, title,  figsize):
#     index = 0
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot(data)
    ax.set_title(title)
    ax.set_ylim([-1, 7])
    
def save_data(data, title, save_dir):
    file_name = os.path.join(save_dir, title + '.txt')
    with open(file_name, 'w') as open_file:
        open_file.write('index probabilities\n')
        for index, row in enumerate(data):
            open_file.write(str(index) + ' '+ ' '.join(str(r) for r in row) + '\n')
        open_file.close()
    

In [17]:
def face_detection(image, mp_face_detection):
    h, w, _ = image.shape
    with mp_face_detection.FaceDetection(
        model_selection=1, min_detection_confidence=0.5) as face_detection:

        results = face_detection.process(
            cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        detections = results.detections
        if detections:
            detection = detections[0]
            x1 = int(w * detection.location_data.relative_bounding_box.xmin)
            y1 = int(h * detection.location_data.relative_bounding_box.ymin)

            if x1 < 0:
                x1 = 0
            if y1 < 0:
                y1 = 0

            width = w * detection.location_data.relative_bounding_box.width
            height = h * detection.location_data.relative_bounding_box.height
            x2 = int(w * detection.location_data.relative_bounding_box.xmin + width)
            y2 = int(h * detection.location_data.relative_bounding_box.ymin + height)
            if x2 > w:
                x2 = w
            if y2 > h:
                y2 = h
            return [x1, y1, x2, y2]
    return None

In [18]:
def detect(model, video_path, mp_face_detection, save_dir):
    probability = []
    class_list = []

    eval_transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])

    cap = cv2.VideoCapture(video_path)
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            coord = face_detection(frame, mp_face_detection)
            if coord:
                (x1, y1, x2, y2) = coord
                face = frame[y1:y2, x1:x2, :]
            else:
                face = frame

            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face, (224, 224))
#             plt.imshow(face)
            face = eval_transforms(face)
            face = face.reshape((1, 3, 224, 224))
            face = face.to(device)

            outputs = model(face)
            sm = softmax(outputs[0])
            am = torch.argmax(outputs[0])
            probability.append(sm.detach().cpu().numpy()[0].tolist())
            class_list.append(am.detach().cpu().tolist())
        else:
            break
    probability = np.array(probability)
    class_ar = np.array(class_list)
    
    video_name = video_path.split('/')[-1]
    video_name = video_name.split('.')[0]
    
    plot_probability(probability, video_name, (8, 5), save_dir)
    save_data(probability, video_name, save_dir)

In [19]:
mp_face_detection = mp.solutions.face_detection

video_path = '/kaggle/input/ckp-video/Train/S503_001.avi'

model = Model()
softmax = nn.Softmax(dim=1)
model.load_state_dict(torch.load(weight_path)['model_state_dict'])
device = torch.device(device_name)
model.to(device)

save_dir = 'val'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

for name in VAL_CSV['name']:
    video_path = os.path.join(VAL_DIR, name)
    detect(model, video_path, mp_face_detection, save_dir)
    
    
save_dir = 'train'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

for name in TRAIN_CSV['name']:
    video_path = os.path.join(TRAIN_DIR, name)
    detect(model, video_path, mp_face_detection, save_dir)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [20]:
import shutil
shutil.make_archive('/kaggle/working/train', 'zip', '/kaggle/working')

'/kaggle/working/train.zip'