# Import dependencies

In [1]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import time

# Import library with current code functions
sys.path.append(os.path.join("..", "lib"))
import manual_labeler_functions as man_lab_fun, automatic_labeler_functions as aut_lab_fun, general_functions as gf, files_paths as fp

## Getting the list of measure files to be labeled and the list of seed available

In [2]:
FILE_LIST_VD_MEASURE = gf.find_files_in_all_subdirectories([fp.DATASET_YT, fp.DATASET_LOCAL], fp.VD_MEASURE_L0)
FILE_LIST_LABELED_SEED = gf.find_files_in_all_subdirectories([fp.DATASET_SEED], fp.VD_LABELED_L0)

## Remove all VD_LABELED_L0 file

In [3]:
def remove_file(FILE_LIST_PATH, file_name):
    for current_path in FILE_LIST_PATH:
        vd_labeled_path = os.path.join(os.path.dirname(current_path), file_name)
        if os.path.exists(vd_labeled_path):
            os.remove(vd_labeled_path)
            print(f"The file was removed: {vd_labeled_path}")
        else:
            print("File not found.")

In [None]:
remove_file(FILE_LIST_VD_MEASURE, fp.VD_LABELED_L0)

## Select reference SEED

In [5]:
FILE_LIST_SEED_VD_INFO = gf.find_files_in_all_subdirectories([fp.DATASET_SEED], fp.VD_INFO)

In [None]:
count_happy = count_neutral = 0

for vd_info_file in FILE_LIST_SEED_VD_INFO:
    vd_info_df = pd.read_csv(vd_info_file)
    #print("vd_info_df", vd_info_df['link_video'])

    count_happy += len([emotion for emotion in vd_info_df['link_video'] if 'happy' in str(vd_info_df['link_video'])])
    count_neutral += len([emotion for emotion in vd_info_df['link_video'] if 'neutral' in str(vd_info_df['link_video'])])

print("Number of happy seeds:", count_happy)
print("Number of neutral seeds:", count_neutral)

In [None]:
FILE_LIST_LABELED_SEED

## Plot query serie

In [None]:
current_labeling_class = 0 # Change for the correspondent number of your labeling class
frame_distance_threshold = 2
euclidian_distance_threshold = 1.2
current_total_saved_series = 0

matches_memory = []
all_matches_memory = []
all_mass_memory = []
all_idxs_match_frame_seq_memory = []
all_seeds_occurrences_len = []

all_data_memory = []

for j, labeled_file in enumerate(FILE_LIST_LABELED_SEED[:15]):
    RESUME_DT = pd.DataFrame()
    print(f'\nSearch with seed: {os.path.basename(os.path.dirname(labeled_file))}\n')

    current_seed_matches_memory = []
    current_seed_all_matches_memory = []
    current_seed_all_mass_memory = []

    current_seed_data_memory = []
    
    for i, current_path_location in enumerate(FILE_LIST_VD_MEASURE):

        path_dir = os.path.dirname(labeled_file)
        vd_labeled_path = os.path.join(path_dir, fp.VD_LABELED_L0)
        vd_labeled = pd.read_csv(vd_labeled_path)
        vd_labeled.drop(columns=['Unnamed: 0'], inplace=True)

        all_class = man_lab_fun.GET_ALL_CLASSES(vd_labeled)
        label_name = all_class[current_labeling_class]

        reference_measures = man_lab_fun.GET_MEASURES_FROM_CLASS(vd_labeled, label_name)

        frames = man_lab_fun.GET_FRAMES_FROM_CLASS(vd_labeled, label_name)

        all_measures_in_frame_interval = vd_labeled.loc[0:len(frames)]

        selected_measures_in_frame_interval = all_measures_in_frame_interval[reference_measures]

        dict_label_parameters = {'label_name': label_name, 'reference_measures': reference_measures}

        RESUME_DT, matches, all_matches, all_mass, idxs_match_frame_seq, occurences_len = aut_lab_fun.label_current_series(current_path_location, RESUME_DT, selected_measures_in_frame_interval, dict_label_parameters, os.path.dirname(labeled_file), LABELED_FILE_NAME=fp.VD_LABELED_L0, distance_threshold=euclidian_distance_threshold, frame_threshold=frame_distance_threshold)  
        current_seed_matches_memory.append(matches)
        current_seed_all_matches_memory.append(all_matches)
        current_seed_all_mass_memory.append(all_mass)

        current_seed_data_memory.append([])

        for k, (frame, dist) in enumerate(idxs_match_frame_seq):
            frame_dist_len_data = []
            frame_dist_len_data.append(frame)
            frame_dist_len_data.append(dist)
            frame_dist_len_data.append(occurences_len[k])

            current_seed_data_memory[i].append(frame_dist_len_data)

    matches_memory.append(current_seed_matches_memory)
    all_matches_memory.append(current_seed_all_matches_memory)
    all_mass_memory.append(current_seed_all_mass_memory)

    all_data_memory.append(current_seed_data_memory)

    final_sum = RESUME_DT['final'].sum()
    current_total_saved_series += final_sum
    print(f'Number of Found Subseries: {final_sum}') 
    print(f'Current total of saved subseries == {current_total_saved_series}') 

## Analise de matching (PODE APAGAR TUDO DEPOIS)

In [44]:
def analyse_data(memory, mass=0):
    print("Number of videos analyzed:", len(memory))
    print("Number of measures [0]:", len(memory[0]))
    print("Number of matches [0][0]:", len(memory[0][0]))
    if mass == 0:
        memory_distances = [item[0] for item in memory[0][0]]
    elif mass == 1:
        memory_distances = [item for item in memory[0][0]]

    first_value = sorted(memory_distances)[0]
    last_value = sorted(memory_distances)[-1]

    memory_distance_mean = np.mean(memory_distances)
    memory_distance_std = np.std(memory_distances)
    print(f"Mean: {memory_distance_mean}, STD {memory_distance_std}")
    print(f"First value: {first_value}, Last value: {last_value}")

In [45]:
def analyse_per_seed(all_data, mass=0):
    print(f"Number of seeds: {len(all_data)}\n",)
    for i, seed_data in enumerate(all_data):
        print(f"Seed number {i}:")
        analyse_data(seed_data, mass)
        print("")

In [None]:
analyse_per_seed(matches_memory, mass=0)

In [None]:
total_number_of_occurrences = 0

for i, seed in enumerate(all_seeds_occurrences_len):
    for j, video in enumerate(seed):
        total_number_of_occurrences += len(video)
        for k, occurrence_len in enumerate(video):
            if occurrence_len != 99:
                print(f"Seed: {i}, video: {j}, occurrence: {k}, len occurrence: {occurrence_len}")

print("total_number_of_occurrences", total_number_of_occurrences)

In [None]:
len(matches_memory[0])

## All matches 

In [None]:
analyse_per_seed(all_matches_memory, mass=0)

## All mass 

In [None]:
analyse_per_seed(all_mass_memory, mass=1)

## True matches

In [None]:
for i, seed in enumerate(all_data_memory):
    print(f"=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Seed {i+1} =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=")
    for j, video in enumerate(seed):
        print(f" =-=-=-=-=-= Video: {j+1}, number of occurrences: {len(video)} =-=-=-=-=-=")
        for occurrence in video:
            if occurrence[2] != 30:
                print(f"Frame start: {occurrence[0]}, euclidean_dist: {occurrence[1]}, len occurrence: {occurrence[2]}")