In [183]:
try:
    import cv2
except:
    %pip install opencv-python-headless==4.9.0.80

import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import re
import json
import pandas as pd
import datetime
from multiprocessing import Pool, cpu_count
import shutil

print('import successfull')

import successfull


### Constants

In [2]:
BASE_PATH = Path('/home/jovyan/work/videos/')
VIDEO_PATHS = list(BASE_PATH.rglob('*.MP4')) + list(BASE_PATH.rglob('*.mp4'))
CSV_PATHS = list(BASE_PATH.rglob('*.csv'))

T1_PATHS_str = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T2|cut|S18', str(file))]
T1_PATHS = [Path(file) for file in T1_PATHS_str]

#T2_PATHS_str = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T1', str(file))]
#T2_PATHS = [Path(file) for file in T2_PATHS_str]


ELAN = '_ELAN'
SYNC = '_sync'

idx_category = {
    0.0: 'neutral',
    1.1: 'negative',
    1.2: 'positive'
}
category_idx = {
    'neutral': 0.0,
    'negative': 1.1, 
    'positive': 1.2
}

print(len(T1_PATHS))
#print(len(T2_PATHS))

179


In [3]:
PATH = 'path'
TIME_FILE = 'start_end_file'
SYNC_FILE = 'sync_file'
START = 'start_time'
END = 'end_time'
MU = 'Mu_df'
KI = 'Ki_df'

# AB HIER QUATSCH

In [4]:
def process_files(video_paths=T1_PATHS):
    meta_dict = {}
    
    for video_path in tqdm(video_paths):
        name = video_path.stem
        folder = video_path.parent.parent
        
        meta_dict[str(name)] = {
            'path': str(video_path),
            'start_end_file': "",
            'sync_file': "",
            'start_time': '00:00:00',
            'end_time': '00:00:00',
            'Mu_df': '',
            'Ki_df': ''
        } 
        
        # set output path
        destination_folder = Path('/home/jovyan/work/output/') / name
        
        # search for timestamp csv & move to folder
        csvs = [str(x) for x in folder.iterdir() if x.is_file() and x.suffix == '.csv']
        match_csv = [file for file in csvs if re.search(name, file)]
        match_csv = match_csv[0] if match_csv else None

        videoanalyse_folder = Path(folder / 'Videoanalyse')
        if videoanalyse_folder.exists() and videoanalyse_folder.is_dir():
            # search for analysis txt file and move to folder
            txt_name = name[:13] + ELAN + name[13:] + SYNC
            txts = [str(x) for x in videoanalyse_folder.iterdir() if x.is_file() and x.suffix == '.txt']
            match_txt = [file for file in txts if re.search(txt_name, file)]
            match_txt = match_txt[0] if match_txt else None

        # only create output if both files exist
        if match_csv and match_txt: 
            destination_folder.mkdir(parents=True, exist_ok=True)
            
            shutil.copy(match_csv, str(destination_folder))
            meta_dict[str(name)]['start_end_file'] = str(destination_folder) + '/' + str(Path(match_csv).name)
            shutil.copy(match_txt, str(destination_folder))
            meta_dict[str(name)]['sync_file'] = str(destination_folder) + '/' + str(Path(match_txt).name)
        
        # delete if some file do not exist
        if str(name) in meta_dict and meta_dict[str(name)]['sync_file'] == "":
                del meta_dict[str(name)]
    
    # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(meta_dict, f, indent=4)
        
    return meta_dict

In [5]:
def process_times(file_dict):
    # adapt start and end times
    for (file, file_info) in tqdm(file_dict.items()):
        se_file = file_dict[str(file)]['start_end_file']
        if se_file:
            df = pd.read_csv(se_file)
            file_dict[str(file)]['start_time'] = f"{df.iloc[0]['hour']}:{df.iloc[0]['minute']}:{df.iloc[0]['milisecond']}"
            file_dict[str(file)]['end_time'] = f"{df.iloc[1]['hour']}:{df.iloc[1]['minute']}:{df.iloc[1]['milisecond']}"
   
    # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(file_dict, f, indent=4)
    return file_dict

In [14]:
def process_analysis(file_dict):
    local_dict = file_dict
    for (file, file_info) in tqdm(local_dict.items()):        
        sync_file = local_dict[str(file)][SYNC_FILE]
        if sync_file:
            df = pd.read_csv(sync_file, sep="	", header=None)
            if(len(df.columns) >= 10):
                df = df.drop(df.columns[-1], axis=1)
            columns = ['category', 'file', 'timestamp_start_long', 
                       'timestamp_start_short', 'timestamp_end_long', 'timestamp_end_short', 
                       'length_long', 'length_short', 'label']
            df.columns = columns
            # create df for both mother and child and save to output
            filtered_df = df[df['category'].str.contains('SE')].reset_index(drop=True)
            mu_df = filtered_df[filtered_df['category'].str.contains('Mu')].reset_index(drop=True)
            ki_df = filtered_df[filtered_df['category'].str.contains('Ki')].reset_index(drop=True)

            output_dir = Path('/home/jovyan/work/output/' + file)
            output_dir.mkdir(parents=True, exist_ok=True)

            mu_path = str(output_dir) + '/MU_DF.csv'
            ki_path = str(output_dir) + '/KI_DF.csv'
            
            mu_df.to_csv(mu_path, index=False)
            ki_df.to_csv(ki_path, index=False)

            local_dict[str(file)][MU] = mu_path
            local_dict[str(file)][KI] = ki_path

    # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(local_dict, f, indent=4)
    
    return local_dict

In [15]:
def move_videos(file_dict):
    for (file, file_info) in tqdm(file_dict.items()):
        
        video_path = file_dict[str(file)]['path']
        destination_folder = Path('/home/jovyan/work/output/') / Path(file).name
        
        shutil.copy(video_path, str(destination_folder))

        file_dict[str(file)]['path'] = str(destination_folder / Path(video_path).name)
        
        # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(file_dict, f, indent=4)
    return file_dict

In [16]:
file_dict = process_files()
file_dict = process_times(file_dict)
file_dict = process_analysis(file_dict)
file_dict = move_videos(file_dict)

100%|██████████| 179/179 [00:16<00:00, 10.75it/s]
100%|██████████| 87/87 [00:00<00:00, 197.99it/s]
100%|██████████| 87/87 [00:03<00:00, 23.64it/s]
100%|██████████| 87/87 [01:29<00:00,  1.03s/it]


In [37]:
# file constants
time_path = '/home/jovyan/work/output/time_dict.json'
meta_path = '/home/jovyan/work/output/meta_file.json'

# misc
name = 'START_S001_T1_La1'

In [216]:
timestamp

27.92

In [219]:
def extract_frames(time_path, meta_path, file_dict):
    for file, file_info in tqdm(file_dict.items()):   
        #file = 'START_S010_T1_La1'
        print(file)
        with open(time_path) as f:
            time_dict = json.load(f)

        with open(meta_path) as f:
            meta_file = json.load(f)
        
        # set up output path for saving
        output_dir = Path(f'/home/jovyan/work/output/frames/{file}')
        output_dir.mkdir(parents=True, exist_ok=True)
        #print(meta_file[file][START])
        # compute time difference between start of experiment and video start time
        start_time = datetime.datetime.strptime(meta_file[file][START], '%H:%M:%f').replace(microsecond=0)
        start_time = datetime.time(start_time.hour, start_time.minute, int(start_time.strftime('%H:%M:%S:%f')[-6:-4]))
        start_time = datetime.datetime.combine(datetime.date(1900, 1, 1), start_time)
        video_time = datetime.datetime.strptime(time_dict[file], '%H:%M:%S')

        #print(start_time)
        #print(video_time)
        
        if start_time <= video_time:
            diff = 0
        elif not video_time.time() == datetime.datetime.strptime('00:00:00', '%H:%M:%S').time():
            diff = (start_time - video_time).seconds
        else:
            diff = 0

        #print(diff)
        #return

        # set up cv2 things
        cap = cv2.VideoCapture(str(meta_file[file][PATH])) 
        fps, num_frames = int(cap.get(cv2.CAP_PROP_FPS)), int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # beginnning of experiment frame index
        start_idx = int(fps*diff)
        
        # get the event dfs
        ki_df = pd.read_csv(meta_file[file][KI])
        mu_df = pd.read_csv(meta_file[file][MU])
        
        ki_df_short = ki_df[['timestamp_start_short', 'label']]
        mu_df_short = mu_df[['timestamp_start_short', 'label']]
        
        for index, (ki_row, mu_row) in enumerate(zip(ki_df_short.iterrows(), mu_df_short.iterrows())):
            # access df data
            _, ki_data = ki_row
            _, mu_data = mu_row
            ki_timestamp, ki_label = ki_data['timestamp_start_short'], ki_data['label']
            mu_timestamp, mu_label = mu_data['timestamp_start_short'], mu_data['label']
        
            # getframe of start of experiment
            frame_idx = int(start_idx + int(ki_timestamp * fps))
            if frame_idx >= num_frames:
                break
                
            # set up cv2 stuff
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            _, frame = cap.read()
            res_path = str(output_dir / f'{file}_frame_{index}_timestamp_{ki_timestamp}_MU_{mu_label}_KI_{ki_label}.jpg')
            cv2.imwrite(res_path, frame)

In [220]:
extract_frames(time_path, meta_path, file_dict)

  0%|          | 0/87 [00:00<?, ?it/s]

START_S001_T1_La1


  1%|          | 1/87 [00:06<09:52,  6.89s/it]

START_S001_T1_La2


  2%|▏         | 2/87 [00:13<09:09,  6.46s/it]

START_S004_T1_La1


  3%|▎         | 3/87 [00:20<09:30,  6.80s/it]

START_S004_T1_La2


  5%|▍         | 4/87 [00:27<09:34,  6.92s/it]

START_S005_T1_La1


  6%|▌         | 5/87 [00:32<08:41,  6.36s/it]

START_S005_T1_La2


  7%|▋         | 6/87 [00:39<08:34,  6.36s/it]

START_S006_T1_La1


  8%|▊         | 7/87 [00:46<08:46,  6.59s/it]

START_S006_T1_La2


  9%|▉         | 8/87 [00:53<08:47,  6.68s/it]

START_S009_T1_La1


 10%|█         | 9/87 [00:59<08:43,  6.71s/it]

START_S009_T1_La2


 11%|█▏        | 10/87 [01:06<08:39,  6.75s/it]

START_S010_T1_La1


 13%|█▎        | 11/87 [01:13<08:32,  6.75s/it]

START_S010_T1_La2


 14%|█▍        | 12/87 [01:20<08:25,  6.74s/it]

START_S011_T1_La1


 15%|█▍        | 13/87 [01:26<08:22,  6.79s/it]

START_S011_T1_La2


 16%|█▌        | 14/87 [01:33<08:00,  6.58s/it]

START_S012_T1_La1


 17%|█▋        | 15/87 [01:39<07:52,  6.56s/it]

START_S012_T1_La2


 18%|█▊        | 16/87 [01:46<07:45,  6.56s/it]

START_S013_T1_La1


 20%|█▉        | 17/87 [01:52<07:40,  6.57s/it]

START_S013_T1_La2


 21%|██        | 18/87 [01:59<07:35,  6.60s/it]

START_S014_T1_La1


 22%|██▏       | 19/87 [02:03<06:34,  5.79s/it]

START_S014_T1_La2


 23%|██▎       | 20/87 [02:10<06:47,  6.08s/it]

START_S015_T1_La1


 24%|██▍       | 21/87 [02:16<06:56,  6.31s/it]

START_S015_T1_La2


 25%|██▌       | 22/87 [02:23<07:01,  6.49s/it]

START_S016_T1_La1


 26%|██▋       | 23/87 [02:30<07:04,  6.64s/it]

START_S016_T1_La2


 28%|██▊       | 24/87 [02:37<07:06,  6.76s/it]

START_S017_T1_La1


 29%|██▊       | 25/87 [02:44<06:54,  6.68s/it]

START_S017_T1_La2


 30%|██▉       | 26/87 [02:50<06:46,  6.66s/it]

START_S018_T1_La1


 31%|███       | 27/87 [02:57<06:32,  6.55s/it]

START_S018_T1_La2


 32%|███▏      | 28/87 [03:03<06:27,  6.56s/it]

START_S019_T1_La1


 33%|███▎      | 29/87 [03:10<06:21,  6.59s/it]

START_S019_T1_La2


 34%|███▍      | 30/87 [03:17<06:15,  6.59s/it]

START_S020_T1_La1


 36%|███▌      | 31/87 [03:24<06:14,  6.68s/it]

START_S020_T1_La2


 37%|███▋      | 32/87 [03:31<06:15,  6.83s/it]

START_S021_T1_La1


 38%|███▊      | 33/87 [03:38<06:11,  6.88s/it]

START_S021_T1_La2


 39%|███▉      | 34/87 [03:45<06:09,  6.96s/it]

START_S022_T1_La1


 40%|████      | 35/87 [03:52<06:00,  6.93s/it]

START_S022_T1_La2


 41%|████▏     | 36/87 [03:59<06:00,  7.08s/it]

START_S023_T1_La1


 43%|████▎     | 37/87 [04:06<05:52,  7.05s/it]

START_S023_T1_La2


 44%|████▎     | 38/87 [04:13<05:42,  6.98s/it]

START_S024_T1_La1


 45%|████▍     | 39/87 [04:20<05:32,  6.94s/it]

START_S024_T1_La2


 46%|████▌     | 40/87 [04:27<05:25,  6.92s/it]

START_S025_T1_La1


 47%|████▋     | 41/87 [04:33<05:14,  6.84s/it]

START_S025_T1_La2


 48%|████▊     | 42/87 [04:40<05:02,  6.72s/it]

START_S026_T1_La1


 49%|████▉     | 43/87 [04:46<04:55,  6.71s/it]

START_S026_T1_La2


 51%|█████     | 44/87 [04:53<04:46,  6.67s/it]

START_S027_T1_La1


 52%|█████▏    | 45/87 [05:00<04:39,  6.66s/it]

START_S027_T1_La2


 53%|█████▎    | 46/87 [05:06<04:32,  6.65s/it]

START_S028_T1_La1


 54%|█████▍    | 47/87 [05:13<04:27,  6.68s/it]

START_S028_T1_La2


 55%|█████▌    | 48/87 [05:20<04:19,  6.65s/it]

START_S029_T1_La1


 56%|█████▋    | 49/87 [05:26<04:10,  6.58s/it]

START_S029_T1_La2


 57%|█████▋    | 50/87 [05:33<04:02,  6.57s/it]

START_S030_T1_La1


 59%|█████▊    | 51/87 [05:40<04:02,  6.73s/it]

START_S030_T1_La2


 60%|█████▉    | 52/87 [05:47<03:59,  6.84s/it]

START_S031_T1_La1


 61%|██████    | 53/87 [05:54<03:53,  6.87s/it]

START_S031_T1_La2


 62%|██████▏   | 54/87 [06:00<03:46,  6.86s/it]

START_S032_T1_La1


 63%|██████▎   | 55/87 [06:07<03:37,  6.79s/it]

START_S032_T1_La2


 64%|██████▍   | 56/87 [06:14<03:28,  6.74s/it]

START_S033_T1_La1


 66%|██████▌   | 57/87 [06:21<03:22,  6.74s/it]

START_S033_T1_La2


 67%|██████▋   | 58/87 [06:27<03:15,  6.74s/it]

START_S035_T1_La1


 68%|██████▊   | 59/87 [06:34<03:10,  6.79s/it]

START_S035_T1_La2


 69%|██████▉   | 60/87 [06:41<03:05,  6.86s/it]

START_S036_T1_La1


 70%|███████   | 61/87 [06:48<03:01,  6.97s/it]

START_S036_T1_La2


 71%|███████▏  | 62/87 [06:55<02:54,  6.99s/it]

START_S037_T1_La1


 72%|███████▏  | 63/87 [07:02<02:45,  6.89s/it]

START_S037_T1_La2


 74%|███████▎  | 64/87 [07:09<02:38,  6.90s/it]

START_S038_T1_La1


 75%|███████▍  | 65/87 [07:16<02:32,  6.92s/it]

START_S038_T1_La2


 76%|███████▌  | 66/87 [07:23<02:24,  6.86s/it]

START_S039_T1_La1


 77%|███████▋  | 67/87 [07:29<02:15,  6.80s/it]

START_S039_T1_La2


 78%|███████▊  | 68/87 [07:36<02:08,  6.75s/it]

START_S040_T1_La1


 79%|███████▉  | 69/87 [07:43<02:01,  6.72s/it]

START_S040_T1_La2


 80%|████████  | 70/87 [07:49<01:54,  6.71s/it]

START_S041_T1_La1


 82%|████████▏ | 71/87 [07:56<01:46,  6.67s/it]

START_S041_T1_La2


 83%|████████▎ | 72/87 [08:02<01:39,  6.63s/it]

START_S042_T1_La1


 84%|████████▍ | 73/87 [08:09<01:32,  6.61s/it]

START_S042_T1_La2


 85%|████████▌ | 74/87 [08:16<01:26,  6.66s/it]

START_S043_T1_La2


 86%|████████▌ | 75/87 [08:22<01:20,  6.67s/it]

START_S044_T1_La1


 87%|████████▋ | 76/87 [08:29<01:13,  6.68s/it]

START_S044_T1_La2


 89%|████████▊ | 77/87 [08:36<01:07,  6.74s/it]

START_S045_T1_La1


 90%|████████▉ | 78/87 [08:43<01:01,  6.85s/it]

START_S045_T1_La2


 91%|█████████ | 79/87 [08:50<00:55,  6.91s/it]

START_S047_T1_La1


 92%|█████████▏| 80/87 [08:57<00:47,  6.84s/it]

START_S047_T1_La2


 93%|█████████▎| 81/87 [09:04<00:40,  6.82s/it]

START_S048_T1_La1


 94%|█████████▍| 82/87 [09:10<00:34,  6.82s/it]

START_S048_T1_La2


 95%|█████████▌| 83/87 [09:17<00:27,  6.85s/it]

START_S049_T1_La1


 97%|█████████▋| 84/87 [09:24<00:20,  6.87s/it]

START_S049_T1_La2


 98%|█████████▊| 85/87 [09:31<00:13,  6.85s/it]

START_S002_T1_La1


 99%|█████████▉| 86/87 [10:07<00:15, 15.70s/it]

START_S002_T1_La2


100%|██████████| 87/87 [10:46<00:00,  7.43s/it]
