In [1]:
%pip install opencv-python-headless==4.9.0.80
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import re
import json
import pandas as pd
from datetime import datetime
print('install successfull')

Note: you may need to restart the kernel to use updated packages.
install successfull


In [3]:
BASE_PATH = Path('/home/jovyan/work/videos/')
VIDEO_PATHS = list(BASE_PATH.rglob('*.MP4')) + list(BASE_PATH.rglob('*.mp4'))
CSV_PATHS = list(BASE_PATH.rglob('*.csv'))

T1_PATHS_str = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T2', str(file))]
T2_PATHS_str = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T1', str(file))]

T1_PATHS = [Path(file) for file in T1_PATHS_str]

ELAN = '_ELAN'
SYNC = '_sync'

In [None]:
class VideoPreprocessor():
    def __init__(self, base_path, video_paths, csv_paths):
        self.base_path = base_path
        self.video_paths = video_paths
        self.csv_paths = csv_paths
        self.t1_paths = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T2', str(file))]
        self.t2_paths = [str(file) for file in VIDEO_PATHS if not re.search('gelöschte|clipped|T1', str(file))]
        self.files_dict = 

    def get_paths_as_str(self, path_list):
        return [Path(file) for file in path_list]

    def find_files(self):
        files_dict = {}
    
        for v in tqdm(self.video_paths):
            name = v.stem
            folder = v.parent.parent
            
            csvs = [str(x) for x in folder.iterdir() if x.is_file() and x.suffix == '.csv']
            match_csv = [file for file in csvs if re.search(name, file)]
            if match_csv:
                match_csv = match_csv[0]
            else:
                match_csv = None
    
            videoanalyse_folder = Path(folder / 'Videoanalyse')
            if videoanalyse_folder.exists() and videoanalyse_folder.is_dir():
                txt_name = name[:13] + ELAN + name[13:] + SYNC
                txts = [str(x) for x in videoanalyse_folder.iterdir() if x.is_file() and x.suffix == '.txt']
                match_txt = [file for file in txts if re.search(txt_name, file)]
                if match_txt:
                    match_txt = match_txt[0]
                else:
                    match_txt = None
            
                files_dict[str(name)] = {
                    'start_end_file': match_csv,
                    'analysis_file': match_txt,
                    'path': str(v),
                    'start_time': 0,
                    'end_time': 0
                }
        output_dir = Path('/home/jovyan/work/output')
        if not output_dir.exists():
            output_dir.mkdir(parents=True, exist_ok=True)
        res_dir = str(output_dir / 'files.json')
        with open(str(output_dir / 'files.json'), 'w') as json_file:
            json.dump(files_dict, json_file, indent=4)
            print(f'saved file into: {res_dir}')

        self.files_dict = files_dict
            
        return files_dict



    def get_times(self):
        files_dict = self.files_dict
        
        for file, file_info in tqdm(files_dict.items()):
            se_file = file_info.get('start_end_file')
            if se_file is None:
                files_dict[str(file)]['start_time'] = '00:00:00'
                files_dict[str(file)]['end_time'] = '00:00:00'
                continue
    
            df = pd.read_csv(se_file)
            
            start = str(df.iloc[0]['hour']) + ':' + str(df.iloc[0]['minute']) + ':' + str(df.iloc[0]['milisecond'])
            end = str(df.iloc[1]['hour']) + ':' + str(df.iloc[1]['minute']) + ':' + str(df.iloc[1]['milisecond'])
    
            files_dict[str(file)]['start_time'] = start
            files_dict[str(file)]['end_time'] = end
            
        output_dir = Path('/home/jovyan/work/output')
        if not output_dir.exists():
            output_dir.mkdir(parents=True, exist_ok=True)
            
        res_dir = str(output_dir / 'files.json')
        with open(res_dir, 'w') as json_file:
            json.dump(files_dict, json_file, indent=4)
            print(f'saved file into: {res_dir}')

        self.files_dict = files_dict
        return files_dict


    def get_analysis(self):
        files_dict = self.files_dict
    
        for file, file_info in tqdm(files_dict.items()):
            sync_file = file_info['analysis_file']
    
            if sync_file is not None:
                try:
                    columns = ['category', 'file', 'timestamp_start_long', 'timestamp_start_short', 
                       'timestamp_end_long', 'timestamp_end_short', 'length_long', 'length_short', 
                       'label', 'label_str']
                    df = pd.read_csv(sync_file, sep="	", header=None)
                    df.columns = columns
                    filtered_df = df[df['category'].str.contains('SE')]
                
                    output_dir = Path('/home/jovyan/work/output/' + file)
                    if not output_dir.exists():
                        output_dir.mkdir(parents=True, exist_ok=True)
                    res_path = str(output_dir / 'analysis.csv')
                    filtered_df.to_csv(res_path, index=False)
            
                    file_info['analysis_file'] = res_path
                except pd.errors.ParserError as e:
                    print(f"Error parsing CSV file: {e} with file {sync_file}")
                    continue
    
            else: 
                pass
    
        output_dir = Path('/home/jovyan/work/output')
        if not output_dir.exists():
            output_dir.mkdir(parents=True, exist_ok=True)
        res_dir = str(output_dir / 'files.json')
        with open(res_dir, 'w') as json_file:
            json.dump(files_dict, json_file, indent=4)
            print(f'saved file into: {res_dir}')
        self.files_dict = files_dict
        return files_dict