In [None]:
try:
    import cv2
except:
    %pip install opencv-python-headless==4.9.0.80

import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import re
import json
import pandas as pd
import datetime
from multiprocessing import Pool, cpu_count
import shutil

print('import successfull')

### Constants

In [None]:
BASE_PATH = Path('/home/jovyan/work/videos/')
VIDEO_PATHS = list(BASE_PATH.rglob('*.MP4')) + list(BASE_PATH.rglob('*.mp4'))
CSV_PATHS = list(BASE_PATH.rglob('*.csv'))

T1_PATHS_str = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T2|cut|S18', str(file))]
T1_PATHS = [Path(file) for file in T1_PATHS_str]

#T2_PATHS_str = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T1', str(file))]
#T2_PATHS = [Path(file) for file in T2_PATHS_str]


ELAN = '_ELAN'
SYNC = '_sync'

idx_category = {
    0.0: 'neutral',
    1.1: 'negative',
    1.2: 'positive'
}
category_idx = {
    'neutral': 0.0,
    'negative': 1.1, 
    'positive': 1.2
}

print(len(T1_PATHS))
#print(len(T2_PATHS))

In [None]:
PATH = 'path'
TIME_FILE = 'start_end_file'
SYNC_FILE = 'sync_file'
START = 'start_time'
END = 'end_time'
MU = 'Mu_df'
KI = 'Ki_df'

# AB HIER QUATSCH

In [None]:
def process_files(video_paths=T1_PATHS):
    meta_dict = {}
    
    for video_path in tqdm(video_paths):
        name = video_path.stem
        folder = video_path.parent.parent
        
        meta_dict[str(name)] = {
            'path': str(video_path),
            'start_end_file': "",
            'sync_file': "",
            'start_time': '00:00:00',
            'end_time': '00:00:00',
            'Mu_df': '',
            'Ki_df': ''
        } 
        
        # set output path
        destination_folder = Path('/home/jovyan/work/output/') / name
        
        # search for timestamp csv & move to folder
        csvs = [str(x) for x in folder.iterdir() if x.is_file() and x.suffix == '.csv']
        match_csv = [file for file in csvs if re.search(name, file)]
        match_csv = match_csv[0] if match_csv else None

        videoanalyse_folder = Path(folder / 'Videoanalyse')
        if videoanalyse_folder.exists() and videoanalyse_folder.is_dir():
            # search for analysis txt file and move to folder
            txt_name = name[:13] + ELAN + name[13:] + SYNC
            txts = [str(x) for x in videoanalyse_folder.iterdir() if x.is_file() and x.suffix == '.txt']
            match_txt = [file for file in txts if re.search(txt_name, file)]
            match_txt = match_txt[0] if match_txt else None

        # only create output if both files exist
        if match_csv and match_txt: 
            destination_folder.mkdir(parents=True, exist_ok=True)
            
            shutil.copy(match_csv, str(destination_folder))
            meta_dict[str(name)]['start_end_file'] = str(destination_folder) + '/' + str(Path(match_csv).name)
            shutil.copy(match_txt, str(destination_folder))
            meta_dict[str(name)]['sync_file'] = str(destination_folder) + '/' + str(Path(match_txt).name)
        
        # delete if some file do not exist
        if str(name) in meta_dict and meta_dict[str(name)]['sync_file'] == "":
                del meta_dict[str(name)]
    
    # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(meta_dict, f, indent=4)
        
    return meta_dict

In [None]:
def process_times(file_dict):
    # adapt start and end times
    for (file, file_info) in tqdm(file_dict.items()):
        se_file = file_dict[str(file)]['start_end_file']
        if se_file:
            df = pd.read_csv(se_file)
            file_dict[str(file)]['start_time'] = f"{df.iloc[0]['hour']}:{df.iloc[0]['minute']}:{df.iloc[0]['milisecond']}"
            file_dict[str(file)]['end_time'] = f"{df.iloc[1]['hour']}:{df.iloc[1]['minute']}:{df.iloc[1]['milisecond']}"
   
    # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(file_dict, f, indent=4)
    return file_dict

In [None]:
def process_analysis(file_dict):
    local_dict = file_dict
    for (file, file_info) in tqdm(local_dict.items()):        
        sync_file = local_dict[str(file)][SYNC_FILE]
        if sync_file:
            df = pd.read_csv(sync_file, sep="	", header=None)
            if(len(df.columns) >= 10):
                df = df.drop(df.columns[-1], axis=1)
            columns = ['category', 'file', 'timestamp_start_long', 
                       'timestamp_start_short', 'timestamp_end_long', 'timestamp_end_short', 
                       'length_long', 'length_short', 'label']
            df.columns = columns
            # create df for both mother and child and save to output
            filtered_df = df[df['category'].str.contains('SE')].reset_index(drop=True)
            mu_df = filtered_df[filtered_df['category'].str.contains('Mu')].reset_index(drop=True)
            ki_df = filtered_df[filtered_df['category'].str.contains('Ki')].reset_index(drop=True)

            output_dir = Path('/home/jovyan/work/output/' + file)
            output_dir.mkdir(parents=True, exist_ok=True)

            mu_path = str(output_dir) + '/MU_DF.csv'
            ki_path = str(output_dir) + '/KI_DF.csv'
            
            mu_df.to_csv(mu_path, index=False)
            ki_df.to_csv(ki_path, index=False)

            local_dict[str(file)][MU] = mu_path
            local_dict[str(file)][KI] = ki_path

    # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(local_dict, f, indent=4)
    
    return local_dict

In [None]:
def move_videos(file_dict):
    for (file, file_info) in tqdm(file_dict.items()):
        
        video_path = file_dict[str(file)]['path']
        destination_folder = Path('/home/jovyan/work/output/') / Path(file).name
        
        shutil.copy(video_path, str(destination_folder))

        file_dict[str(file)]['path'] = str(destination_folder / Path(video_path).name)
        
        # save json
    json_file = '/home/jovyan/work/output/meta_file.json'
    with open(json_file, 'w') as f:
        json.dump(file_dict, f, indent=4)
    return file_dict

In [None]:
file_dict = process_files()
file_dict = process_times(file_dict)
file_dict = process_analysis(file_dict)
file_dict = move_videos(file_dict)

In [None]:
# file constants
time_path = '/home/jovyan/work/output/time_dict.json'
meta_path = '/home/jovyan/work/output/meta_file.json'

# misc
name = 'START_S001_T1_La1'

In [None]:
timestamp

In [None]:
def extract_frames(time_path, meta_path, file_dict):
    for file, file_info in tqdm(file_dict.items()):   
        #file = 'START_S010_T1_La1'
        #print(file)
        with open(time_path) as f:
            time_dict = json.load(f)

        with open(meta_path) as f:
            meta_file = json.load(f)
        
        # set up output path for saving
        output_dir = Path(f'/home/jovyan/work/output/frames/{file}')
        output_dir.mkdir(parents=True, exist_ok=True)
        #print(meta_file[file][START])
        # compute time difference between start of experiment and video start time
        start_time = datetime.datetime.strptime(meta_file[file][START], '%H:%M:%f').replace(microsecond=0)
        start_time = datetime.time(start_time.hour, start_time.minute, int(start_time.strftime('%H:%M:%S:%f')[-6:-4]))
        start_time = datetime.datetime.combine(datetime.date(1900, 1, 1), start_time)
        video_time = datetime.datetime.strptime(time_dict[file], '%H:%M:%S')

        #print(start_time)
        #print(video_time)
        
        if start_time <= video_time:
            diff = 0
        elif not video_time.time() == datetime.datetime.strptime('00:00:00', '%H:%M:%S').time():
            diff = (start_time - video_time).seconds
        else:
            diff = 0

        #print(diff)
        #return

        # set up cv2 things
        cap = cv2.VideoCapture(str(meta_file[file][PATH])) 
        fps, num_frames = int(cap.get(cv2.CAP_PROP_FPS)), int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # beginnning of experiment frame index
        start_idx = int(fps*diff)
        
        # get the event dfs
        ki_df = pd.read_csv(meta_file[file][KI])
        mu_df = pd.read_csv(meta_file[file][MU])
        
        ki_df_short = ki_df[['timestamp_start_short', 'label']]
        mu_df_short = mu_df[['timestamp_start_short', 'label']]
        
        for index, (ki_row, mu_row) in enumerate(zip(ki_df_short.iterrows(), mu_df_short.iterrows())):
            # access df data
            _, ki_data = ki_row
            _, mu_data = mu_row
            ki_timestamp, ki_label = ki_data['timestamp_start_short'], ki_data['label']
            mu_timestamp, mu_label = mu_data['timestamp_start_short'], mu_data['label']
        
            # getframe of start of experiment
            frame_idx = int(start_idx + int(ki_timestamp * fps))
            if frame_idx >= num_frames:
                break
                
            # set up cv2 stuff
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            _, frame = cap.read()
            res_path = str(output_dir / f'{file}_frame_{index}_timestamp_{ki_timestamp}_MU_{mu_label}_KI_{ki_label}.jpg')
            cv2.imwrite(res_path, frame)

In [None]:
extract_frames(time_path, meta_path, file_dict)