In [1]:
!pip install pytorchvideo 
!pip install mediapipe

Collecting pytorchvideo
  Downloading pytorchvideo-0.1.5.tar.gz (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.7/132.7 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting av
  Downloading av-10.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting parameterized
  Downloading parameterized-0.9.0-py2.py3-none-any.whl (20 kB)
Collecting iopath
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.8 MB/s

In [2]:
import os
import math
import cv2
import pickle
import random
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import transforms
import torch.nn.init as init
from torchvision import models

import mediapipe as mp
import matplotlib.pyplot as plt

In [3]:
label_path='list_patition_label.txt'

weight_path = '/kaggle/input/dan-def-weight/rafdb_epoch21_acc0.897_bacc0.8275.pth'

workers=2
batch_size=64
w=7
h=7
device_name=0
lam=5
epochs=100

In [4]:
class DAN(nn.Module):
    def __init__(self, num_class=7,num_head=4, pretrained=True):
        super(DAN, self).__init__()
        
        resnet = models.resnet18(pretrained)
        
        if pretrained:
            checkpoint = torch.load('/kaggle/input/dan-def-weight/resnet18_msceleb.pth')
            resnet.load_state_dict(checkpoint['state_dict'],strict=True)

        self.features = nn.Sequential(*list(resnet.children())[:-2])
        self.num_head = num_head
        for i in range(num_head):
            setattr(self,"cat_head%d" %i, CrossAttentionHead())
        self.sig = nn.Sigmoid()
        self.fc = nn.Linear(512, num_class)
        self.bn = nn.BatchNorm1d(num_class)


    def forward(self, x):
        x = self.features(x)
        heads = []
        for i in range(self.num_head):
            heads.append(getattr(self,"cat_head%d" %i)(x))
        
        heads = torch.stack(heads).permute([1,0,2])
        if heads.size(1)>1:
            heads = F.log_softmax(heads,dim=1)
            
        out = self.fc(heads.sum(dim=1))
        out = self.bn(out)
   
        return out, x, heads

class CrossAttentionHead(nn.Module):
    def __init__(self):
        super().__init__()
        self.sa = SpatialAttention()
        self.ca = ChannelAttention()
        self.init_weights()


    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def forward(self, x):
        sa = self.sa(x)
        ca = self.ca(sa)

        return ca


class SpatialAttention(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1x1 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=1),
            nn.BatchNorm2d(256),
        )
        self.conv_3x3 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3,padding=1),
            nn.BatchNorm2d(512),
        )
        self.conv_1x3 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=(1,3),padding=(0,1)),
            nn.BatchNorm2d(512),
        )
        self.conv_3x1 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=(3,1),padding=(1,0)),
            nn.BatchNorm2d(512),
        )
        self.relu = nn.ReLU()


    def forward(self, x):
        y = self.conv1x1(x)
        y = self.relu(self.conv_3x3(y) + self.conv_1x3(y) + self.conv_3x1(y))
        y = y.sum(dim=1,keepdim=True) 
        out = x*y
        
        return out 

class ChannelAttention(nn.Module):

    def __init__(self):
        super().__init__()
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.attention = nn.Sequential(
            nn.Linear(512, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 512),
            nn.Sigmoid()    
        )


    def forward(self, sa):
        sa = self.gap(sa)
        sa = sa.view(sa.size(0),-1)
        y = self.attention(sa)
        out = sa * y
        
        return out

In [5]:
def test(model, test_loader, device):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        iter_cnt = 0
        correct_sum = 0
        data_num = 0

        for batch_i, (imgs1, labels) in enumerate(test_loader):
            
            if batch_i%5 == 0:
                print('Batch: {}/{}'.format(batch_i, len(test_loader)))
            imgs1 = imgs1.to(device)
            labels = labels.to(device)

#             outputs, _ = model(imgs1)
            outputs,feat,heads = model(imgs1)
            
            loss = nn.CrossEntropyLoss()(outputs, labels)

            iter_cnt += 1
            _, predicts = torch.max(outputs, 1)

            correct_num = torch.eq(predicts, labels).sum()
            correct_sum += correct_num

            running_loss += loss
            data_num += outputs.size(0)

        running_loss = running_loss / iter_cnt
        test_acc = correct_sum.float() / float(data_num)
    return test_acc, running_loss

In [6]:
class ImageData(data.Dataset):
    def __init__(self, df, data_dir, transform=None):
        super().__init__()
        self.df = df
        self.data_dir = data_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_name = f'{self.data_dir}/{self.df.iloc[index]["name"]}'
        label = self.df.iloc[index]["label"]-1
        
        image = cv2.imread(img_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        
        if self.transform is not None:
            image = self.transform(image)
        
        return image, label

In [7]:
labels = {'name': ['Surprise', 'Fear', 'Disgust', 'Happiness', 'Sadness', 'Anger', 'Neutral'],
         'color': ['blue', 'orange', 'green', 'red', 'darkviolet', 'brown', 'pink']}

In [8]:
def plot_probability(data, title,  figsize, save_dir):
    file_name = os.path.join(save_dir, title + '.png')    
    fig, ax = plt.subplots(figsize=figsize)
    
    
#     print([x for x in range(0, len(data))])
    ylabel = [x for x in range(0, len(data))]
#     print(data)
    ax.plot(ylabel, data)
    ax.set_title(title)
    
    for index, line in enumerate(plt.gca().get_lines()):
        line.set_color(label['color'][index])
    
    ax.legend(label['name'],
                  loc='upper left')
    
    fig.savefig(file_name)
    plt.close(fig)
        
def plot_classes(data, title,  figsize):
#     index = 0
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot(data)
    ax.set_title(title)
    ax.set_ylim([-1, 7])
    
def save_data(data, title, save_dir):
    file_name = os.path.join(save_dir, title + '.txt')
    with open(file_name, 'w') as open_file:
        open_file.write('index probabilities\n')
        for index, row in enumerate(data):
            open_file.write(str(index) + ' '+ ' '.join(str(r) for r in row) + '\n')
        open_file.close()

In [9]:
def detect(model, video_path, mp_face_detection, save_dir=''):
    with torch.no_grad():
        softmax = nn.Softmax(dim=1)
        model.eval()
        probability = []
        class_list = []

        eval_transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])

        cap = cv2.VideoCapture(video_path)
        while (cap.isOpened()):
            ret, frame = cap.read()
            if ret == True:
                coord = face_detection(frame, mp_face_detection)
                if coord:
                    (x1, y1, x2, y2) = coord
                    face = frame[y1:y2, x1:x2, :]
                else:
                    face = frame
                val2_loader = torch.utils.data.DataLoader(face, batch_size=1,
                                                  shuffle=False,
                                                  num_workers=workers,
                                                  pin_memory=True)
                face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                face = cv2.resize(face, (224, 224))
    #             plt.imshow(face)
                face = eval_transforms(face)
                face = face.reshape((1, 3, 224, 224))
                face = face.to(device)

                outputs = model(face)
                sm = softmax(outputs[0])
                am = torch.argmax(outputs[0])
                probability.append(sm.detach().cpu().numpy()[0].tolist())
                class_list.append(am.detach().cpu().tolist())
            else:
                break
        probability = np.array(probability)
        class_ar = np.array(class_list)

        video_name = video_path.split('/')[-1]
        video_name = video_name.split('.')[0]

#         plot_probability(probability, video_name, (8, 5), save_dir)
#         save_data(probability, video_name, save_dir)

In [10]:
def face_detection(image, mp_face_detection):
    h, w, _ = image.shape
    with mp_face_detection.FaceDetection(
        model_selection=1, min_detection_confidence=0.5) as face_detection:

        results = face_detection.process(
            cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        detections = results.detections
        if detections:
            detection = detections[0]
            x1 = int(w * detection.location_data.relative_bounding_box.xmin)
            y1 = int(h * detection.location_data.relative_bounding_box.ymin)

            if x1 < 0:
                x1 = 0
            if y1 < 0:
                y1 = 0

            width = w * detection.location_data.relative_bounding_box.width
            height = h * detection.location_data.relative_bounding_box.height
            x2 = int(w * detection.location_data.relative_bounding_box.xmin + width)
            y2 = int(h * detection.location_data.relative_bounding_box.ymin + height)
            if x2 > w:
                x2 = w
            if y2 > h:
                y2 = h
            return [x1, y1, x2, y2]
    return None

In [11]:
mp_face_detection = mp.solutions.face_detection

video_path = '/kaggle/input/ckp-video/Train/S503_001.avi'

VAL_CSV = pd.read_csv(
    '/kaggle/input/ckp-video/val.txt',
    sep=' ',
)

VAL_DIR = '/kaggle/input/ckp-video/Val'

TRAIN_CSV = pd.read_csv(
    '/kaggle/input/ckp-video/train.txt',
    sep=' ',
)

TRAIN_DIR = '/kaggle/input/ckp-video/Train'

model = DAN(num_head=4)
model.load_state_dict(torch.load(weight_path)['model_state_dict'])
device = torch.device(device_name)
model.to(device)

# save_dir = 'val'
# if not os.path.exists(save_dir):
#     os.makedirs(save_dir)

# for name in VAL_CSV['name']:
#     video_path = os.path.join(VAL_DIR, name)
#     detect(model, video_path, mp_face_detection, save_dir)
    
    
# save_dir = 'train'
# if not os.path.exists(save_dir):
#     os.makedirs(save_dir)

# for name in TRAIN_CSV['name']:
#     video_path = os.path.join(TRAIN_DIR, name)
#     detect(model, video_path, mp_face_detection, save_dir)

  f"Using {sequence_to_str(tuple(keyword_only_kwargs.keys()), separate_last='and ')} as positional "
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

DAN(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [12]:
# import shutil
# from IPython.display import FileLink

# shutil.make_archive('/kaggle/working/val', 'zip', '/kaggle/working')

In [13]:
# shutil.make_archive('/kaggle/working/val', 'zip', '/kaggle/working')

In [14]:
def frame_count(video_path):
    with torch.no_grad():
        frame_counter = 0
        cap = cv2.VideoCapture(video_path)
        while (cap.isOpened()):
            ret, frame = cap.read()
            if ret == True:
                frame_counter += 1
            else:
                break
        return frame_counter

In [15]:
from statistics import mean

list_frames = []
for name in VAL_CSV['name']:
    video_path = os.path.join(VAL_DIR, name)
    list_frames.append(frame_count(video_path))
#     
for name in TRAIN_CSV['name']:
    video_path = os.path.join(TRAIN_DIR, name)
    list_frames.append(frame_count(video_path))
    
# print(list_frames)
    
print('min', min(list_frames))
print('max', max(list_frames))
print('average', int(mean(list_frames)))


min 6
max 71
average 17


In [16]:
def detect2(model, video_path, mp_face_detection, save_dir=''):
    with torch.no_grad():
        softmax = nn.Softmax(dim=1)
        model.eval()
        probability_list = []
        predictions_list = []
        counter = 0

        eval_transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])

        cap = cv2.VideoCapture(video_path)
        while (cap.isOpened()):
            ret, frame = cap.read()
            if ret == True:
                counter += 1
                coord = face_detection(frame, mp_face_detection)
                if coord:
                    (x1, y1, x2, y2) = coord
                    face = frame[y1:y2, x1:x2, :]
                else:
                    face = frame
                val2_loader = torch.utils.data.DataLoader(face, batch_size=1,
                                                  shuffle=False,
                                                  num_workers=workers,
                                                  pin_memory=True)
                face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                face = cv2.resize(face, (224, 224))
                face = eval_transforms(face)
                face = face.reshape((1, 3, 224, 224))
                face = face.to(device)

                outputs = model(face)
                sm = softmax(outputs[0])
                am = torch.argmax(outputs[0])
                probability_list.append(sm.detach().cpu().numpy()[0].tolist())
                predictions_list.append(am.detach().cpu().tolist())
            else:
                break
        probability_arr = np.array(probability_list)
        predictions_arr = np.array(predictions_list)
        return predictions_arr, probability_arr

In [17]:
def to_label(key):
# CKp 0=neutral, 1=anger, 2=contempt, 3=disgust, 4=fear, 5=happy, 6=sadness, 7=surprise
# RAF 0: Surprise 1: Fear 2: Disgust 3: Happiness 4: Sadness 5: Anger 6: Neutral
    labeling = {0:6, 1:5, 2:-1, 3:2, 4:1, 5:3, 6:4, 7:0}
    return labeling[key]

def list_to_class(arr):
    numer_of_classes = 7
    counter = np.zeros(numer_of_classes)
    for element in arr:
        counter[element] += 1
#     print(counter)
    return np.argmax(counter)

In [18]:
# equal_train = 0
# not_equal_train = 0
# equal_val = 0
# not_equal_val = 0

# for name, label in zip(TRAIN_CSV['name'], TRAIN_CSV['label']):
#     new_label = to_label(label)
#     if new_label == -1:
#         continue
#     video_path = os.path.join(TRAIN_DIR, name)
#     predictions, probability = detect2(model, video_path, mp_face_detection, save_dir)
    
#     if to_label(label) == list_to_class(predictions):
#         equal_train += 1
#     else:
#         not_equal_train += 1
    
# print('TRAIN: equal', equal_train, 'not equal', not_equal_train, 'accuracu', equal_train/(equal_train+not_equal_train))

# for name, label in zip(VAL_CSV['name'], VAL_CSV['label']):
#     new_label = to_label(label)
#     if new_label == -1:
#         continue
#     video_path = os.path.join(VAL_DIR, name)
#     predictions, probability = detect2(model, video_path, mp_face_detection, save_dir)
    
#     if to_label(label) == list_to_class(predictions):
#         equal_val += 1
#     else:
#         not_equal_val += 1
    
# print('VALIDATION: equal', equal_val, 'not equal', not_equal_val, 'accuracu', equal_val/(equal_val+not_equal_val))

In [19]:
def quantitative_analysis(predictions):
    counter = np.zeros(NUMBER_CLASSES)
    for element in predictions:
        counter[element] += 1
    if sum(counter==np.amax(counter)) > 1:
        return 1
    return 0

def outlier_analysis(predictions):
    single_counter = 0
    double_counter = 0
    triple_counter = 0
    for index in range(0, len(predictions)-2):
        if predictions[index] == predictions[index+2] and predictions[index] != predictions[index+1]:
            single_counter += 1
        if index < (len(predictions)-3) and predictions[index] == predictions[index+3] and predictions[index] != predictions[index+1] and predictions[index] != predictions[index+2]:
            double_counter += 1
        if index < (len(predictions)-4) and predictions[index] == predictions[index+4] and predictions[index] != predictions[index+1] and predictions[index] != predictions[index+2] and predictions[index] != predictions[index+3]:
            triple_counter += 1
    return single_counter, double_counter, triple_counter

def chatter_analysis(predictions):
    series_list = []
    counter = 2
    for index in range(0, len(predictions)-2):
        if predictions[index] != predictions[index+2] and predictions[index] != predictions[index+1] and predictions[index+1] != predictions[index+2]:
            counter += 1
        else:
            if counter > 3:
                series_list.append(counter+2)
            counter = 2
    return series_list
    

def predictions_analysis(predictions):
    outliers = np.zeros(3)
    
    frames_uncertainty = quantitative_analysis(predictions)
        
    outliers[0], outliers[1], outliers[2] = outlier_analysis(predictions)
    chatter_series = chatter_analysis(predictions)
        
    return frames_uncertainty, outliers, chatter_series

def average_plot(probabilities, average):
    
    return 0

def average(probabilities, window_size):
    out = []
    if len(probabilities) > window_size:
        out.append([sum(probabilities[x-window_size:x])/window_size for x in range(window_size, len(probabilities))])
    
    print(out)
    return out


def probability_analysis(probabilities):
    
    error_arr = np.zeros(1)
    series_list = []
    series = 0
    for  i, probability_frames in enumerate(probabilities):
        counter = 0
        for probability in probability_frames:
            if probability > MAX_PORABILITIES:
                counter += 1
        if counter > 1:
            error_arr[0] += 1
            series += 1
        else:
            if series > 1:
                series_list.append(series)
            series = 0

    return error_arr, series_list
    

In [20]:
NUMBER_CLASSES = 7
gol_frames = 71

equal_val = 0
not_equal_val = 0
validation_result = []

val_probabilities = []
val_predictions = []
val_label = []

val_video_predictions = []
val_covolution_matrix = np.zeros((7, 7))

true_positive = 0
true_negative = 0
false_positive = 0
false_negative = 0


for name, label in zip(VAL_CSV['name'], VAL_CSV['label']):
    new_label = to_label(label)
    if new_label == -1:
        continue
    video_path = os.path.join(VAL_DIR, name)
    predictions, probabilities = detect2(model, video_path, mp_face_detection)
    adepted_probabilities = np.zeros((gol_frames, 7))
    adepted_probabilities[0: len(probabilities)] = probabilities
    
    adepted_predictions = np.zeros((gol_frames))
    adepted_predictions[0: len(predictions)] = predictions
    for index in range(len(predictions)+1, gol_frames):
        adepted_probabilities[index] = probabilities[-1:]
        adepted_predictions[index] = predictions[-1:]
    
    
    frames_uncertainty, outliers, chatter_series = predictions_analysis(predictions)
    
    validation_result.append([name, frames_uncertainty, outliers, chatter_series])
    val_probabilities.append(adepted_probabilities)
    val_predictions.append(adepted_predictions)
    val_label.append(label)
    
    val_video_predictions.append(list_to_class(predictions))
    if new_label == list_to_class(predictions):
        true_positive += 1
        if frames_uncertainty or len(chatter_series) or sum(outliers):
            true_negative += 1
    else:
        false_positive += 1
        if (frames_uncertainty>0) or len(chatter_series) or sum(outliers):
            false_negative += 1
        
    val_covolution_matrix[list_to_class(predictions), new_label] += 1
      
precision = true_positive/(true_positive + false_positive)
recall = true_positive/(true_positive + false_negative)
f_score = (2 * recall * precision)/(recall + precision)

accuracy = (true_positive + true_negative)/(true_positive + true_negative + false_positive + false_negative)

pd.DataFrame([true_positive, true_negative, false_positive, false_negative, precision, recall, f_score, accuracy], 
             index=['true positive', 'true negative', 'false positive', 'false negative', 'precision', 'recall', 'F score', 'accuracy'])

# print('VALIDATION: equal', true_positive, 
#       ',not equal', false_positive, 
#       'precision', precision, 
#       'recall', recall,
#       'F_score', f_score)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Unnamed: 0,0
true positive,38.0
true negative,4.0
false positive,18.0
false negative,3.0
precision,0.678571
recall,0.926829
F score,0.783505
accuracy,0.666667


In [21]:
pd.DataFrame(val_covolution_matrix, columns=labels['name'], index=labels['name'])


Unnamed: 0,Surprise,Fear,Disgust,Happiness,Sadness,Anger,Neutral
Surprise,11.0,0.0,0.0,0.0,0.0,0.0,0.0
Fear,0.0,1.0,0.0,0.0,0.0,0.0,0.0
Disgust,0.0,0.0,4.0,0.0,0.0,1.0,0.0
Happiness,1.0,0.0,1.0,18.0,0.0,0.0,0.0
Sadness,0.0,1.0,0.0,0.0,4.0,4.0,0.0
Anger,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Neutral,3.0,1.0,3.0,0.0,0.0,3.0,0.0


In [22]:
# pd.DataFrame(validation_result, columns=['name', 'frames uncertainty', 'outliers[single double triple]', 'chatter series'])
# pd.DataFrame(validation_result, columns=['Имя', 'Неопределенность в кадрак', 'Выброс[один два три]', 'Шум'])
# вврппрпор

In [23]:
equal_train = 0
not_equal_train = 0
train_result = []

train_probabilities = []
train_predictions = []
train_label = []

for name, label in zip(TRAIN_CSV['name'], TRAIN_CSV['label']):
    new_label = to_label(label)
    if new_label == -1:
        continue
    video_path = os.path.join(TRAIN_DIR, name)
    predictions, probabilities = detect2(model, video_path, mp_face_detection)
    
    adepted_predictions = np.zeros((gol_frames))
    adepted_predictions[0: len(predictions)] = predictions
    for index in range(len(probabilities)+1, gol_frames):
        adepted_probabilities[index] = probabilities[-1:]
        adepted_predictions[index] = predictions[-1:]
    
    frames_uncertainty, outliers, chatter_series = predictions_analysis(predictions)
#     probability_error, series_uncertainty = probability_analysis(probabilities)
    
    train_result.append([name, frames_uncertainty, outliers, chatter_series])
    train_probabilities.append(adepted_probabilities)
    train_predictions.append(adepted_predictions)
    train_label.append(label)
    
    if to_label(label) == list_to_class(predictions):
        equal_train += 1
    else:
        not_equal_train += 1

    if new_label == list_to_class(predictions):
        true_positive += 1
        if frames_uncertainty or len(chatter_series) or sum(outliers):
            true_negative += 1
    else:
        false_positive += 1
        if (frames_uncertainty>0) or len(chatter_series) or sum(outliers):
            false_negative += 1
        
        
precision = true_positive/(true_positive + false_positive)
recall = true_positive/(true_positive + false_negative)
f_score = (2 * recall * precision)/(recall + precision)
accuracy = (true_positive + true_negative)/(true_positive + true_negative + false_positive + false_negative)

pd.DataFrame([true_positive, true_negative, false_positive, false_negative, precision, recall, f_score, accuracy], 
             index=['true positive', 'true negative', 'false positive', 'false negative', 'precision', 'recall', 'F score', 'accuracy'])
    



Unnamed: 0,0
true positive,194.0
true negative,32.0
false positive,115.0
false negative,33.0
precision,0.627832
recall,0.854626
F score,0.723881
accuracy,0.604278


In [24]:
print('TRAIN: equal', equal_train, 'not equal', not_equal_train, 'accuracu', equal_train/(equal_train+not_equal_train))

TRAIN: equal 156 not equal 97 accuracu 0.616600790513834


In [25]:
pd.DataFrame(train_result, columns=['name', 'frames uncertainty', 'outliers[single double triple]', 'chatter series'])


Unnamed: 0,name,frames uncertainty,outliers[single double triple],chatter series
0,S005_001.avi,0,"[0.0, 0.0, 0.0]",[]
1,S010_002.avi,0,"[0.0, 0.0, 0.0]",[]
2,S010_004.avi,0,"[0.0, 0.0, 0.0]",[]
3,S010_006.avi,0,"[0.0, 0.0, 0.0]",[]
4,S011_001.avi,0,"[1.0, 0.0, 1.0]",[]
...,...,...,...,...
248,S504_006.avi,0,"[3.0, 0.0, 0.0]",[]
249,S505_006.avi,0,"[0.0, 0.0, 0.0]",[]
250,S506_004.avi,0,"[0.0, 0.0, 0.0]",[]
251,S999_001.avi,0,"[1.0, 0.0, 0.0]",[]


In [26]:
# class AirModel(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.lstm = nn.LSTM(input_size=7, hidden_size=gol_frames, num_layers=1, batch_first=True)
#         self.linear = nn.Linear(gol_frames, 1)
#     def forward(self, x):
#         l1, _ = self.lstm(x)
#         out = self.linear(l1)
#         return out
    
# model = AirModel()
# optimizer = torch.optim.Adam(model.parameters())
# loss_fn = nn.MSELoss()

In [27]:
# # print(np.array(train_probabilities).shape)
# # print(np.array(train_label_list).shape)

# X_train = torch.tensor(train_probabilities)
# X_train = X_train.float()
# # print(X_train)
# train_label_list = np.zeros((len(train_label), 7))
# for index, label in enumerate(train_label):
#     train_label_list[index][label-1] = 1
# y_train = torch.tensor(train_label_list)
# y_train = y_train.float()

# X_val = torch.tensor(val_probabilities)
# X_val = X_val.float()
# val_label_list = np.zeros((len(val_label), 7))
# for index, label in enumerate(val_label):
#     val_label_list[index][label-1] = 1
# y_val = torch.tensor(val_label_list)
# y_val = y_val.float()

# loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=8)
 
# n_epochs = 2000
# for epoch in range(n_epochs):
#     model.train()
#     for X_batch, y_batch in loader:
        
#         y_pred = model(X_batch)
#         _, predicts = torch.max(y_pred, 1)
#         print(predicts, y_batch)
#         loss = loss_fn(predicts, y_batch)
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#     # Validation
#     if epoch % 100 != 0:
#         continue
#     model.eval()
#     with torch.no_grad():
#         y_train_pred = model(X_train)
#         train_rmse = np.sqrt(loss_fn(y_pred, y_train))
        
        
#         y_val_pred = model(X_val)
#         test_rmse = np.sqrt(loss_fn(y_pred, y_val))
        
        
#     print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse), len(y_train_pred), len(y_val_pred))

In [28]:
# # import torch.nn as nn

# class RNN_LSTM_Base(nn.Module):
#     def training_step(self, batch):
#         samples, targets = batch
#         outputs = self(samples.double())
#         loss = nn.functional.mse_loss(outputs, targets)
#         return loss

# class VanillaRNN(RNN_LSTM_Base):
#     def __init__(self, in_size, hid_size_rnn, hid_size_lin, out_size, n_layers=1):
#         super(VanillaRNN, self).__init__()
#         # Define dimensions for the layers
#         self.input_size = in_size
#         self.hidden_size_rnn = hid_size_rnn
#         self.hidden_size_lin = hid_size_lin
#         self.output_size = out_size
#         self.n_layers = n_layers
#         # Defining the RNN layer
#         self.rnn = nn.RNN(in_size, hid_size_rnn, n_layers, batch_first=True)
#         # Defining the linear layer
#         self.linear = nn.Linear(hid_size_lin, out_size)

#     def forward(self, x):
#         # x must be of shape (batch_size, seq_len, input_size)
#         xb = x.view(x.size(0), x.size(1), self.input_size).double()
#         # Initialize the hidden layer's array of shape (n_layers*n_dirs, batch_size, hidden_size_rnn)
#         h0 = torch.zeros(self.n_layers, x.size(0), self.hidden_size_rnn, requires_grad=True).double()
#         # out is of shape (batch_size, seq_len, num_dirs*hidden_size_rnn)
#         out, hn = self.rnn(xb, h0)
#         # out needs to be reshaped into dimensions (batch_size, hidden_size_lin)
#         out = out.reshape(x.size(0), self.hidden_size_lin)
#         out = nn.functional.relu(out)
#         # Finally we get out in the shape (batch_size, output_size)
#         out = self.linear(out)
#         return out


#     def fit(epochs, lr, model, train_loader, test_loader, opt_func=torch.optim.SGD):
#         optimizer = opt_func(model.parameters(), lr)
#         for epoch in range(epochs):
#             # Training phase
#             model.train()
#         for batch in train_loader:
#             loss = model.training_step(batch)
#             # Calculate gradients from chain rule
#             loss.backward()
#             # Apply gradient descent step
#             optimizer.step()
#             # Remove gradients for next iteration
#             optimizer.zero_grad()
#         return 'Trained for {} epochs'.format(epochs)