# Get cleaner annotations and save them as json

In [2]:
ACTORS = ['Chandler', 'Joey', 'Monica', 'Phoebe', 'Rachel', 'Ross']
ONLY_ACTORS = False
ONLY_FACE = False

import os
from glob import glob
import csv
import json
from tqdm.notebook import tqdm

SAVE_AT = "/home/tk/datasets/MELD/visual-features/MELD-visual-features/"
annotation_path = {}
annotation_path['train'] = "/home/tk/repos/MELD/data/MELD/train_sent_emo.csv" 
annotation_path['dev'] = "/home/tk/repos/MELD/data/MELD/dev_sent_emo.csv" 
annotation_path['test'] = "/home/tk/repos/MELD/data/MELD/test_sent_emo.csv" 

VIDS_DIR = {}
VIDS_DIR['train'] = "/home/tk/datasets/MELD/MELD.Raw/train/train_splits/" 
VIDS_DIR['dev'] = "/home/tk/datasets/MELD/MELD.Raw/dev/dev_splits_complete/" 
VIDS_DIR['test'] = "/home/tk/datasets/MELD/MELD.Raw/test/output_repeated_splits_test"

if not os.path.isfile(os.path.join(SAVE_AT, 'datasets.json')):
    annotations = {}
    for DATATYPE in ['train', 'dev', 'test']:
        with open(annotation_path[DATATYPE]) as f:
            reader = csv.reader(f)
            annotations[DATATYPE] = list(reader)

    vid2anno = {}
    weird = 0
    for DATATYPE in tqdm(['train', 'dev', 'test']):
        vid2anno[DATATYPE] = {}
        for row in tqdm(annotations[DATATYPE][1:]):
            SrNo, Utterance, Speaker, Emotion, Sentiment, Dialogue_ID,\
                Utterance_ID, Season, Episode, StartTime, EndTime = row

            to_save = ['SrNo', 'Utterance', 'Speaker', 'Emotion', 'Sentiment', 'Dialogue_ID',
                        'Utterance_ID', 'Season', 'Episode', 'StartTime', 'EndTime']
                        
            if ONLY_ACTORS:
                if Speaker not in ACTORS:
                    continue
                         

            if f"dia{Dialogue_ID}_utt{Utterance_ID}.mp4" not in str(os.listdir(VIDS_DIR[DATATYPE])):
                weird+=1
                continue

            vid2anno[DATATYPE][f"dia{Dialogue_ID}_utt{Utterance_ID}"] = \
                {'SrNo': SrNo, 
                'Utterance': Utterance, 
                'Speaker': Speaker, 
                'Emotion': Emotion, 
                'Sentiment': Sentiment, 
                'Dialogue_ID': Dialogue_ID,
                'Utterance_ID': Utterance_ID, 
                'Season': Season, 
                'Episode': Episode, 
                'StartTime': StartTime, 
                'EndTime':EndTime}
                    
    print(f"the number of video that didn't match is : {weird}")


    with open(os.path.join(SAVE_AT, 'datasets.json'), 'w', encoding='utf8') as stream:
        json.dump(vid2anno, stream, ensure_ascii=False)

else:
    with open(os.path.join(SAVE_AT, 'datasets.json'), 'r', encoding='utf8') as stream:
        vid2anno = json.load(stream)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=9989.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1109.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2610.0), HTML(value='')))



the number of video that didn't match is : 1


# Instantiate the three classes

In [3]:

from cltl_face_all.face_alignment import FaceDetection
from cltl_face_all.arcface import ArcFace

fd = FaceDetection(device='cpu', face_detector='sfd')
af = ArcFace()

if not ONLY_FACE:
    from cltl_face_all.agegender import AgeGender
    ag = AgeGender(device='cpu')


[*] load ckpt from /home/tk/.virtualenvs/dev-python-3.7/lib/python3.7/site-packages/cltl_face_all/arcface/./pretrained_models/arc_res50/e_8_b_40000.ckpt


# Run over the images

In [4]:
import av
import numpy as np
import os

import signal
import time
from tqdm.notebook import tqdm

class Timeout(Exception):
    pass

def handler(sig, frame):
    raise Timeout

signal.signal(signal.SIGALRM, handler) 

for DATATYPE in tqdm(['train', 'dev', 'test']):
    os.makedirs(os.path.join(SAVE_AT, DATATYPE), exist_ok=True)

    for diautt, anno in tqdm(vid2anno[DATATYPE].items()):
        signal.alarm(10)
        try:
            to_dump = {}
            vidpath = os.path.join(VIDS_DIR[DATATYPE], diautt) + '.mp4'
            container = av.open(vidpath)
            save_full_path = os.path.join(SAVE_AT, DATATYPE, diautt) + '.npy'
        except Exception as e:
            print(e)
            continue

        if os.path.isfile(save_full_path) and os.path.getsize(save_full_path) > 256:
            continue

        for idx, frame in enumerate(container.decode(video=0)):
            signal.alarm(10)
            try:
                to_dump[idx] = []
                numpy_RGB = np.array(frame.to_image())
                batch = numpy_RGB[np.newaxis, ...]
                bboxes = fd.detect_faces(batch)
                # print(f"number of faces detected in the frame {idx} is {len(bboxes[0])}")

                if len(bboxes[0]) == 0:
                    continue

                landmarks = fd.detect_landmarks(batch, bboxes)

                faces = fd.crop_and_align(batch, bboxes, landmarks)
                faces = np.concatenate(faces, axis=0)
                embeddings = af.predict(faces)
                
                if ONLY_FACE:
                    ages, genders = [None] * len(embeddings), [None] * len(embeddings)
                else:
                    ages, genders = ag.predict(faces)

                # print(len(bboxes[0]), len(landmarks[0]), len(ages), len(genders), len(embeddings), len(faces))
                for bb, lm, a, g, emb in zip(bboxes[0], landmarks[0], ages, genders, embeddings):
                    x1, y1, x2, y2, prob = bb

                    to_append = {'bbox': bb, 
                                'landmark': lm, 
                                'age': a,
                                'gender': g,
                                'embedding': emb}

                    to_dump[idx].append(to_append)
                # print(len(to_dump), idx+1)
            except Exception as e:
                print(e)
                continue

        np.save(save_full_path, to_dump)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=9989.0), HTML(value='')))

Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!
moov atom not found
[Errno 1094995529] Invalid data found when processing input: '/home/tk/datasets/MELD/MELD.Raw/train/train_splits/dia125_utt3.mp4'; last error log: [mov,mp4,m4a,3gp,3g2,mj2] moov atom not found



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1108.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2610.0), HTML(value='')))



