In [20]:
import pandas as pd
import json, os

import cv2

def get_video_duration(video_path):
    try:
        # Open the video file
        cap = cv2.VideoCapture(video_path)

        # Get the total number of frames and frames per second (fps)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        # Calculate the duration in seconds
        duration = total_frames / fps

        # Close the video file
        cap.release()

        return duration
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None

def generate_json(csv_file, video_files_path):

    file_names = [f.split('.')[0] for f in os.listdir(video_files_path) if os.path.isfile(os.path.join(video_files_path, f))]

    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file, sep='\t')

    df = df.rename(columns={'SENTENCE': 'SENTENCE_DESCRIPTION'})

    # Remove multiple columns
    columns_to_remove = ['VIDEO_ID', 'VIDEO_NAME', 'START_REALIGNED', 'END_REALIGNED']
    df = df.drop(columns_to_remove, axis=1)

    json_file_output = []

    for index, row in df.iterrows():
        if row['SENTENCE_NAME'] in file_names:

            # Convert the row to a dictionary
            row_dict = row.to_dict()

            sentence_file_path = f"./test/videos/{row_dict['SENTENCE_NAME']}.mp4"
            duration = get_video_duration(sentence_file_path)
            
            row_dict['SENTENCE_FILE_PATH'] = sentence_file_path
            row_dict['SENTENCE_DURATION'] = duration

            json_file_output.append(row_dict)
            
    # Open the JSON file in append mode
    with open("test.json", 'a') as f:
        # Write to the JSON file
        json.dump(json_file_output, f)
        f.write('\n') 

# Provide the path to your CSV file
csv_file = "how2sign_realigned_test.csv"
video_files_path = "./test/videos/"
generate_json(csv_file, video_files_path)