# Import dependencies

In [1]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import time

# Import library with current code functions
sys.path.append(os.path.join("..", "lib"))
import manual_labeler_functions as man_lab_fun, automatic_labeler_functions as aut_lab_fun, general_functions as gf, files_paths as fp

## Getting the list of measure files to be labeled and the list of seed available

In [2]:
FILE_LIST_VD_MEASURE = gf.find_files_in_all_subdirectories([fp.DATASET_YT, fp.DATASET_LOCAL], fp.VD_MEASURE_L0)
FILE_LIST_LABELED_SEED = gf.find_files_in_all_subdirectories([fp.DATASET_SEED], fp.VD_LABELED_L0)

## Remove all VD_LABELED_L0 file

In [3]:
def remove_file(FILE_LIST_PATH, file_name):
    for current_path in FILE_LIST_PATH:
        vd_labeled_path = os.path.join(os.path.dirname(current_path), file_name)
        if os.path.exists(vd_labeled_path):
            os.remove(vd_labeled_path)
            print(f"The file was removed: {vd_labeled_path}")
        else:
            print("File not found.")

In [4]:
remove_file(FILE_LIST_VD_MEASURE, fp.VD_LABELED_L0)

The file was removed: ..\Dataset\DD-Local\VD_D_0000000043\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000044\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000045\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000046\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000047\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000048\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000049\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000050\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000051\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000052\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000053\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000054\VD_LABELED_L0.CSV
The file was removed: ..\Dataset\DD-Local\VD_D_0000000055\VD_LABELED_L0.CSV
The file was

## Select reference SEED

In [5]:
FILE_LIST_SEED_VD_INFO = gf.find_files_in_all_subdirectories([fp.DATASET_SEED], fp.VD_INFO)

In [6]:
count_happy = count_neutral = 0

for vd_info_file in FILE_LIST_SEED_VD_INFO:
    vd_info_df = pd.read_csv(vd_info_file)
    #print("vd_info_df", vd_info_df['link_video'])

    count_happy += len([emotion for emotion in vd_info_df['link_video'] if 'happy' in str(vd_info_df['link_video'])])
    count_neutral += len([emotion for emotion in vd_info_df['link_video'] if 'neutral' in str(vd_info_df['link_video'])])

print("Number of happy seeds:", count_happy)
print("Number of neutral seeds:", count_neutral)

Number of happy seeds: 54
Number of neutral seeds: 84


In [7]:
FILE_LIST_LABELED_SEED

['..\\Dataset\\REF-Gold-Label\\VD_R_0000000001\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000002\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000003\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000004\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000005\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000006\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000007\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000008\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000009\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000010\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000011\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000012\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000013\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000014\\VD_LABELED_L0.CSV',
 '..\\Dataset\\REF-Gold-Label\\VD_R_0000000015\\

In [8]:
current_labeling_class = 0 # Change for the correspondent number of your labeling class
frame_distance_threshold = 2
euclidean_distance_threshold = 1.2
current_total_saved_series = 0

matches_memory = []
all_matches_memory = []
all_mass_memory = []
all_idxs_match_frame_seq_memory = []
all_seeds_occurrences_len = []

all_data_memory = []

for j, labeled_file in enumerate(FILE_LIST_LABELED_SEED[:]):
    RESUME_DT = pd.DataFrame()
    print(f'\nSearch with seed: {os.path.basename(os.path.dirname(labeled_file))}\n')

    current_seed_matches_memory = []
    current_seed_all_matches_memory = []
    current_seed_all_mass_memory = []

    current_seed_data_memory = []
    
    for i, current_path_location in enumerate(FILE_LIST_VD_MEASURE):

        path_dir = os.path.dirname(labeled_file)
        vd_labeled_path = os.path.join(path_dir, fp.VD_LABELED_L0)
        vd_labeled = pd.read_csv(vd_labeled_path)
        vd_labeled.drop(columns=['Unnamed: 0'], inplace=True)

        all_class = man_lab_fun.GET_ALL_CLASSES(vd_labeled)
        label_name = all_class[current_labeling_class]

        reference_measures = man_lab_fun.GET_MEASURES_FROM_CLASS(vd_labeled, label_name)

        frames = man_lab_fun.GET_FRAMES_FROM_CLASS(vd_labeled, label_name)

        all_measures_in_frame_interval = vd_labeled.loc[0:len(frames)]

        selected_measures_in_frame_interval = all_measures_in_frame_interval[reference_measures]

        dict_label_parameters = {'label_name': label_name, 'reference_measures': reference_measures}

        RESUME_DT, matches, all_matches, all_mass, idxs_match_frame_seq, occurrences_len = aut_lab_fun.label_current_series(current_path_location, RESUME_DT, selected_measures_in_frame_interval, dict_label_parameters, os.path.dirname(labeled_file), LABELED_FILE_NAME=fp.VD_LABELED_L0, distance_threshold=euclidean_distance_threshold, frame_threshold=frame_distance_threshold)  

        current_seed_data_memory.append([])

        for k, (frame, dist) in enumerate(idxs_match_frame_seq):
            frame_dist_len_data = []
            frame_dist_len_data.append(frame)
            frame_dist_len_data.append(dist)
            frame_dist_len_data.append(occurrences_len[k])

            current_seed_data_memory[i].append(frame_dist_len_data)

    all_data_memory.append(current_seed_data_memory)

    final_sum = RESUME_DT['final'].sum()
    current_total_saved_series += final_sum
    print(f'Number of occurrences found for the current seed: {final_sum}') 
    print(f'Total of occurrences: {current_total_saved_series}') 


Search with seed: VD_R_0000000001

Number of Found Subseries: 24
Current total of saved subseries == 24

Search with seed: VD_R_0000000002

Number of Found Subseries: 11
Current total of saved subseries == 35

Search with seed: VD_R_0000000003

Number of Found Subseries: 7
Current total of saved subseries == 42

Search with seed: VD_R_0000000004

Number of Found Subseries: 20
Current total of saved subseries == 62

Search with seed: VD_R_0000000005

Number of Found Subseries: 31
Current total of saved subseries == 93

Search with seed: VD_R_0000000006

Number of Found Subseries: 25
Current total of saved subseries == 118

Search with seed: VD_R_0000000007

Number of Found Subseries: 32
Current total of saved subseries == 150

Search with seed: VD_R_0000000008

Number of Found Subseries: 32
Current total of saved subseries == 182

Search with seed: VD_R_0000000009

Number of Found Subseries: 18
Current total of saved subseries == 200

Search with seed: VD_R_0000000010

Number of Found 

## True matches

In [9]:
number_irregulars = 0
for i, seed in enumerate(all_data_memory):
    #print(f"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Seed {i+1} =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=")
    for j, video in enumerate(seed):
        #print(f" =-=-=-=-=-= Video: {j+1}, number of occurrences: {len(video)} =-=-=-=-=-=")
        for occurrence in video:
            if occurrence[2] != 30:
                #print(f"Frame start: {occurrence[0]}, euclidean_dist: {occurrence[1]}, len occurrence: {occurrence[2]}")
                print(f"Seed: {i+1}, Video: {j+1}, Frame start: {occurrence[0]}, euclidean_dist: {occurrence[1]}, len occurrence: {occurrence[2]}")
                number_irregulars += 1
print("Number irregulars:", number_irregulars)

Seed: 5, Video: 3, Frame start: 325, euclidean_dist: 4.4592292994690865, len occurrence: 16
Seed: 5, Video: 14, Frame start: 6700, euclidean_dist: 1.9169064294619393, len occurrence: 24
Seed: 7, Video: 57, Frame start: 2211, euclidean_dist: 2.101794128426443, len occurrence: 28
Seed: 20, Video: 8, Frame start: 355, euclidean_dist: 5.0711307549862354, len occurrence: 18
Seed: 20, Video: 14, Frame start: 6296, euclidean_dist: 5.8089020111477865, len occurrence: 16
Seed: 21, Video: 20, Frame start: 3488, euclidean_dist: 4.334903618668159, len occurrence: 21
Seed: 21, Video: 34, Frame start: 4750, euclidean_dist: 3.7323127715735898, len occurrence: 12
Seed: 22, Video: 5, Frame start: 1819, euclidean_dist: 4.355284369910666, len occurrence: 19
Seed: 23, Video: 4, Frame start: 4858, euclidean_dist: 3.6295397083061522, len occurrence: 26
Seed: 23, Video: 4, Frame start: 356, euclidean_dist: 5.469529143955719, len occurrence: 26
Seed: 23, Video: 5, Frame start: 2435, euclidean_dist: 3.58814250