In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path

import matplotlib.pyplot as plt

from managedata.videos import (VideoID, VideoSegment)
from managedata.videos_duplicates import (compute_dissimilarity_video_list, find_equal_sets_scipy, generate_new_id)
from managedata.handle_dataset import DataSet

In [2]:
# path to the folder with the data as downloaded
folder_data = '/home/anaflo/MDMC/thesis/sensorium/data/'

# path to the metadata folder
folder_meta = os.path.join('..','metadata')

# path to global videos metadata
folder_globalmetavideos = os.path.join(folder_meta, "global_meta", "videos")
folder_globalmetasegments = os.path.join(folder_meta, "global_meta", "segments")
if not os.path.exists(folder_globalmetasegments):
        os.makedirs(folder_globalmetasegments)


# the labels to check
labels = ['NaturalImages','GaussianDot','Gabor','PinkNoise','RandomDots']

# set the limit to decide that two segments are the same or different
limit_dissimilarity = 20


In [3]:
all_used_ids = []

for lab in labels:

    print(f"\nFinding idenitcal segments for label {lab}")
    print("--------------------------------------------------")
    
    all_segments = []
    folder = Path(folder_globalmetavideos)
    json_files = list(folder.glob(f"{lab}*.json"))
    print(f"- {len(json_files)} distint videos found")

    # load all segments
    for file_videoID in json_files:
        video = VideoID(folder_data, Path(file_videoID).parent, Path(file_videoID).stem.split('-')[1])
        for seg_idx in range(len(video.segments['frame_start'])):
            segment = VideoSegment(video, seg_idx)
            segment.label_from_parentvideo()
            all_segments.append(segment)
    print(f"- {len(all_segments)} segments were found and loaded")

    # compute dissimilarity
    print('Computing dissimilarity between segments...')
    dissimilarity = compute_dissimilarity_video_list(all_segments, dissimilarity_measure='mse', check_edges_first=False)

    # extract sets of identical segments
    mask = dissimilarity<=limit_dissimilarity
    list_identical = find_equal_sets_scipy(mask)
    print(f"- {len(list_identical)} different segments were found")

    # loop over identical segments and save metadata 
    print("Saving metadata...")
    for setiden in list_identical:

        # generate a new id
        the_id = generate_new_id(all_used_ids, prefix='s')
        all_used_ids.append(the_id)

        # generate a SegmentID object from the examplar segment and add the duplicates
        segment_i_id = all_segments[next(iter(setiden))].copy(deep=True)
        segment_i_id.ID = the_id
        for k in setiden:
            segment_i_id.add_duplicates( all_segments[k].parentvideo['ID'], all_segments[k].parentvideo['segment_index'])

        # save a json file with the video metadata
        segment_i_id.save_metadata(folder_globalmetasegments)




Finding idenitcal segments for label NaturalImages
--------------------------------------------------
- 19 distint videos found
- 380 segments were found and loaded
Computing dissimilarity between segments...


100%|██████████| 380/380 [00:16<00:00, 22.99it/s]


- 181 different segments were found
Saving metadata...

Finding idenitcal segments for label GaussianDot
--------------------------------------------------
- 18 distint videos found
- 630 segments were found and loaded
Computing dissimilarity between segments...


100%|██████████| 630/630 [00:45<00:00, 13.94it/s]


- 210 different segments were found
Saving metadata...

Finding idenitcal segments for label Gabor
--------------------------------------------------
- 18 distint videos found
- 216 segments were found and loaded
Computing dissimilarity between segments...


100%|██████████| 216/216 [00:07<00:00, 28.58it/s]


- 72 different segments were found
Saving metadata...

Finding idenitcal segments for label PinkNoise
--------------------------------------------------
- 17 distint videos found
- 204 segments were found and loaded
Computing dissimilarity between segments...


100%|██████████| 204/204 [00:08<00:00, 25.14it/s]


- 204 different segments were found
Saving metadata...

Finding idenitcal segments for label RandomDots
--------------------------------------------------
- 24 distint videos found
- 96 segments were found and loaded
Computing dissimilarity between segments...


100%|██████████| 96/96 [00:03<00:00, 27.34it/s]


- 96 different segments were found
Saving metadata...
