# Synchronize Videos

## Preparation

In [None]:
!pip install pydub
!pip install moviepy



In [None]:
import os
from pydub import AudioSegment
from datetime import datetime, timedelta
from moviepy.editor import VideoFileClip

## Function Definition

### Get Mp4 Start, End and Duration

In [None]:
def get_mp4_start_end_duration(file_path):
    """
    Get the start time, end time, and duration of an MP4 file.

    Args:
        file_path (str): The path to the MP4 file.

    Returns:
        tuple: A tuple containing the start time, end time, and duration of the MP4 file.

    Raises:
        OSError: If there is an error accessing or processing the MP4 file.

    """

    try:
        # Get the last modified timestamp of the file
        last_modified = os.path.getmtime(file_path)
        last_modified_datetime = datetime.fromtimestamp(last_modified)

        # Get the duration of the MP4 file
        video_clip = VideoFileClip(file_path)
        duration = video_clip.duration

        # Calculate the start time based on the last modified timestamp and duration
        start_time = last_modified_datetime - timedelta(seconds=duration)

        return start_time, last_modified_datetime, duration

    except OSError as e:
        raise OSError("Error accessing or processing the MP4 file.") from e


### Get Wav Start, End and Duration

In [None]:
def get_wav_start_end_duration(file_path):
    """
    Get the start time, end time, and duration of a WAV file.

    Args:
        file_path (str): The path to the input WAV file.

    Returns:
        tuple: A tuple containing the start time (datetime object),
               end time (datetime object), and duration in seconds (float).

    Raises:
        FileNotFoundError: If the input file_path does not exist.
        Exception: If an error occurs during the extraction process.

    """

    # Check if the file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Input file '{file_path}' does not exist.")

    try:
        # Load the WAV file
        audio = AudioSegment.from_wav(file_path)

        # Get the duration in seconds
        duration = len(audio) / 1000  # Convert duration to seconds

        # Extract the start time from the WAV filename (assuming it follows a specific format)
        start_time = extract_timestamp_from_wav_filename(file_path)

        # Convert duration to a timedelta object
        duration_delta = timedelta(seconds=duration)

        # Calculate the end time by adding the start time and duration
        end_time = start_time + duration_delta

        return start_time, end_time, duration

    except Exception as e:
        raise Exception("Error occurred during extraction.") from e


### Print Time-Range Dictionary

In [None]:
def print_dictionary(time_ranges):
    """
    Prints the dictionary of time ranges.

    Args:
        time_ranges (dict): The dictionary containing time ranges.

    """

    for file_path, file_ranges in time_ranges.items():
        print('\n', os.path.basename(file_path))
        for range_key, range_value in file_ranges.items():
            if isinstance(range_value, datetime):
                print(range_key, ': ', range_value.strftime("%H:%M:%S.%f")[:-3])
            else:
                print(range_key, ': ', range_value, 's')


### Extract Timestamp in Wav filename

In [None]:
def extract_timestamp_from_wav_filename(filename):
    """
    Extract the timestamp from a WAV file filename.

    Args:
        filename (str): The filename of the WAV file.

    Returns:
        datetime.datetime: A datetime object representing the extracted timestamp.

    Raises:
        ValueError: If the filename does not follow the expected format.

    """

    try:
        # Find the positions of the underscores
        underscore_positions = [i for i, char in enumerate(filename) if char == '_']

        # Get the second-to-last underscore position
        second_last_underscore_position = underscore_positions[-2]

        # Extract the timestamp substring
        timestamp_str = filename[second_last_underscore_position + 1: -4]

        # Convert the timestamp string to a datetime object
        timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d_%H.%M.%S")

        return timestamp

    except Exception as e:
        raise ValueError("Invalid filename format.") from e

In [None]:
# Example of use of the function
example_path=r"C:\Users\alvar\Desktop\plants measurments\230429 SJ\230429_SJ1_M1_BYB_Recording_2023-04-29_14.43.34.wav"
extract_timestamp_from_wav_filename(example_path)

datetime.datetime(2023, 4, 29, 14, 43, 34)

### Get Time Difference

In [None]:
def get_time_difference(dt1, dt2):
    """
    Calculate the time difference between two datetime objects.

    Args:
        dt1 (datetime): The first datetime object.
        dt2 (datetime): The second datetime object.

    Returns:
        float: The time difference in seconds.

    Raises:
        TypeError: If either dt1 or dt2 is not a datetime object.

    Examples:
        >>> import datetime
        >>> dt1 = datetime.datetime(2023, 6, 30, 12, 0, 0)
        >>> dt2 = datetime.datetime(2023, 6, 30, 11, 0, 0)
        >>> get_time_difference(dt1, dt2)
        3600.0

    Note:
        This function assumes that dt1 and dt2 are valid datetime objects.
        If the inputs are not datetime objects, a TypeError is raised.

    """

    if not isinstance(dt1, datetime) or not isinstance(dt2, datetime):
        raise TypeError("Both dt1 and dt2 must be datetime objects.")

    if dt1 > dt2:
        greater_datetime = dt1
        smaller_datetime = dt2
    else:
        greater_datetime = dt2
        smaller_datetime = dt1

    time_difference = (greater_datetime - smaller_datetime).total_seconds()

    return time_difference

### Cut the wav file

In [None]:
from datetime import timedelta

def cut_wav_file(file_path, start, end):
    """
    Cuts a WAV file from the specified start time to the specified end time.

    Args:
        file_path (str): The path to the input WAV file.
        start (float): The start time in seconds.
        end (float): The end time in seconds.

    Returns:
        AudioSegment: The cut audio segment.

    Raises:
        FileNotFoundError: If the input file does not exist.
        ValueError: If the start or end time is invalid.
        Exception: If an error occurs during file cutting.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Input file '{file_path}' does not exist.")

    if start < 0 or end < 0 or start >= end:
        raise ValueError("Invalid start or end time.")

    try:
        start_time = timedelta(seconds=start)
        end_time = timedelta(seconds=end)

        audio = AudioSegment.from_file(file_path)
        start_ms = int(start_time.total_seconds() * 1000)
        end_ms = int(end_time.total_seconds() * 1000)
        cut_audio = audio[start_ms:end_ms]

        return cut_audio

    except Exception as e:
        raise Exception("Error occurred during file cutting.") from e



### Cut Mp4 File

In [None]:
def cut_mp4_file(file_path, start, end):
    """
    Cuts an MP4 file from the specified start time to the specified end time.

    Args:
        file_path (str): The path to the input MP4 file.
        start (float): The start time in seconds.
        end (float): The end time in seconds.

    Returns:
        VideoFileClip: The cut video clip.

    Raises:
        FileNotFoundError: If the input file does not exist.
        ValueError: If the start or end time is invalid.
        Exception: If an error occurs during file cutting.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Input file '{file_path}' does not exist.")

    if start < 0 or end < 0 or start >= end:
        raise ValueError("Invalid start or end time.")

    try:
        start_time = start
        end_time = end
        clip = VideoFileClip(file_path).subclip(start_time, end_time)

        return clip

    except Exception as e:
        raise Exception("Error occurred during file cutting.") from e

### Synchronize Wav & Mp4 given the files and the output directory

In [None]:
def synchronize_wav_with_mp4(initial_files, output_directory):
    """
    Synchronizes WAV and MP4 files based on their start and end times.

    Args:
        initial_files (list): A list of file paths.
        output_directory (str): The output directory path.

    Returns:
        dict: A dictionary containing the time ranges of the files.

    Raises:
        FileNotFoundError: If any of the input files does not exist.
        Exception: If an error occurs during the synchronization process.
    """
    files_time_ranges = {}

    for file in initial_files:
        file_extension = os.path.splitext(file)[1].lower()

        if file_extension == ".mp4":
            start, end, duration = get_mp4_start_end_duration(file)
        elif file_extension == ".wav":
            start, end, duration = get_wav_start_end_duration(file)

        files_time_ranges[file] = {
            'initial_start': start,
            'initial_end': end,
            'initial_duration': duration
        }

    # Look for the range of the intersection
    latest_start = datetime.min
    earliest_end = datetime.max
    for key, value in files_time_ranges.items():
        if value['initial_start'] > latest_start:
            latest_start = value['initial_start']
        if value['initial_end'] < earliest_end:
            earliest_end = value['initial_end']
    final_duration = (earliest_end - latest_start).total_seconds()

    # Update and process the final start, end, and duration values
    for key, value in files_time_ranges.items():
        value['final_start'] = latest_start
        value['final_end'] = earliest_end
        value['final_duration'] = final_duration
        start_time = get_time_difference(value['initial_start'], value['final_start'])
        end_time = value['initial_duration'] - get_time_difference(value['initial_end'], value['final_end'])
        file_extension = os.path.splitext(key)[1].lower()
        output_file_path = os.path.join(output_directory, os.path.basename(key))

        if file_extension == ".mp4":
            output_file_path = output_file_path[:-4] + "_reduced.mp4"
            clip = cut_mp4_file(key, start_time, end_time)
            clip.write_videofile(output_file_path)

        elif file_extension == ".wav":
            output_file_path = output_file_path[:-4] + "_reduced.wav"
            start, end, duration = get_wav_start_end_duration(file)
            cut_audio = cut_wav_file(key, start_time, end_time)
            cut_audio.export(output_file_path, format="wav")

    return files_time_ranges


### Synchronize Wav & Mp4 given the folder

In [None]:
def synchronize_wav_mp4_in_folder(directory):
    """
    Synchronize WAV and MP4 files in a given directory.

    Args:
        directory (str): The directory path containing the files.

    Returns:
        dict: A dictionary containing the time ranges of the processed files.

    Raises:
        FileNotFoundError: If the input directory does not exist.
        Exception: If an error occurs during processing.
    """
    # Create a new directory for storing the reduced files
    output_directory = directory + '_reduced'
    os.makedirs(output_directory, exist_ok=True)

    initial_files = []  # Array to store the file paths

    # Get the file paths in the input directory
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        initial_files.append(file_path)

    try:
        # Synchronize and process the WAV and MP4 files
        files_time_ranges = synchronize_wav_with_mp4(initial_files, output_directory)

        return files_time_ranges

    except Exception as e:
        raise Exception("Error occurred during file processing.") from e


## Use of Function

### Individual Folder

In [None]:
"""
###############
EXAMPLE OF USE
###############

-Input-
Directory: r'C:\\Users\\alvarDesktop\\plants measurments\\230616 SJ'
    File: '230616_SJ2_M.mp4'
    File: '230616_SJ2_M_BYB_Recording_2023-06-16_11.13.52.wav'
    File: '230616_SJ2_M_BYB_Recording_2023-06-16_11.13.54.wav'
    File: '230616_SJ2_M_BYB_Recording_2023-06-16_11.13.56.wav'
    File: '230616_SK2_M_BYB_Recording_2023-06-16_11.10.31.wav'
    File: '230616_SK2_M_BYB_Recording_2023-06-16_11.10.34.wav'
    File: '230616_SK2_M_BYB_Recording_2023-06-16_11.10.36.wav'

-Output-
Directory: 'C:\\Users\\alvarDesktop\\plants measurments\\230616 SJ_reduced'
    File: '230616_SJ2_M_reduced.mp4'
    File: '230616_SJ2_M_BYB_Recording_2023-06-16_11.13.52_reduced.wav'
    File: '230616_SJ2_M_BYB_Recording_2023-06-16_11.13.54_reduced.wav'
    File: '230616_SJ2_M_BYB_Recording_2023-06-16_11.13.56_reduced.wav'
    File: '230616_SK2_M_BYB_Recording_2023-06-16_11.10.31_reduced.wav'
    File: '230616_SK2_M_BYB_Recording_2023-06-16_11.10.34_reduced.wav'
    File: '230616_SK2_M_BYB_Recording_2023-06-16_11.10.36_reduced.wav'
"""

# Initializing the directory
directory = r"C:\Users\alvar\Desktop\plants measurments\1. ordered\230606 SJ"

# Using the function
time_ranges = synchronize_wav_mp4_in_folder(directory)

# Print the results
print_dictionary(time_ranges)

Moviepy - Building video C:\Users\alvar\Desktop\plants measurments\1. ordered\230606 SJ_reduced\230606_SJ1_M_reduced.mp4.
MoviePy - Writing audio in 230606_SJ1_M_reducedTEMP_MPY_wvf_snd.mp3


                                                                     

MoviePy - Done.
Moviepy - Writing video C:\Users\alvar\Desktop\plants measurments\1. ordered\230606 SJ_reduced\230606_SJ1_M_reduced.mp4



                                                                

Moviepy - Done !
Moviepy - video ready C:\Users\alvar\Desktop\plants measurments\1. ordered\230606 SJ_reduced\230606_SJ1_M_reduced.mp4

 230606_SJ1_M.mp4
initial_start :  11:07:32.360
initial_end :  11:11:06.000
initial_duration :  213.64 s
final_start :  11:07:32.360
final_end :  11:11:01.776
final_duration :  209.416 s

 230606_SJ1_M_BYB_Recording_2023-06-06_11.07.17.wav
initial_start :  11:07:17.000
initial_end :  11:11:01.776
initial_duration :  224.776 s
final_start :  11:07:32.360
final_end :  11:11:01.776
final_duration :  209.416 s

 230606_SJ1_M_BYB_Recording_2023-06-06_11.07.19.wav
initial_start :  11:07:19.000
initial_end :  11:11:03.406
initial_duration :  224.406 s
final_start :  11:07:32.360
final_end :  11:11:01.776
final_duration :  209.416 s

 230606_SJ1_M_BYB_Recording_2023-06-06_11.07.21.wav
initial_start :  11:07:21.000
initial_end :  11:11:05.309
initial_duration :  224.309 s
final_start :  11:07:32.360
final_end :  11:11:01.776
final_duration :  209.416 s

 230606

### Folder with Subdirectories

In [None]:
def process_subdirectories(root_directory):
    for entry in os.scandir(root_directory):
        if entry.is_dir():
            subdirectory = entry.path
            try:
                print('\n### Processing',subdirectory)
                time_ranges = synchronize_wav_mp4_in_folder(subdirectory)
                print_dictionary(time_ranges)
            except OSError as e:
                print(f"Error processing files in directory: {subdirectory}")
                print(f"Error message: {str(e)}")


# Provide the root directory
root_directory = r"C:\Users\alvar\Desktop\plants measurments\data"

# Call the function to process subdirectories
process_subdirectories(root_directory)


### Processing C:\Users\alvar\Desktop\plants measurments\data\230519 TE
Moviepy - Building video C:\Users\alvar\Desktop\plants measurments\data\230519 TE_reduced\230519_TE1_M_reduced.mp4.
MoviePy - Writing audio in 230519_TE1_M_reducedTEMP_MPY_wvf_snd.mp3


                                                                     

MoviePy - Done.
Moviepy - Writing video C:\Users\alvar\Desktop\plants measurments\data\230519 TE_reduced\230519_TE1_M_reduced.mp4



                                                                

Moviepy - Done !
Moviepy - video ready C:\Users\alvar\Desktop\plants measurments\data\230519 TE_reduced\230519_TE1_M_reduced.mp4

 230519_TE1_M.mp4
initial_start :  09:18:50.560
initial_end :  09:22:46.000
initial_duration :  235.44 s
final_start :  09:18:50.560
final_end :  09:22:44.771
final_duration :  234.211 s

 230519_TE1_M_BYB_Recording_2023-05-19_09.18.38.wav
initial_start :  09:18:38.000
initial_end :  09:22:44.771
initial_duration :  246.771 s
final_start :  09:18:50.560
final_end :  09:22:44.771
final_duration :  234.211 s

 230519_TE1_M_BYB_Recording_2023-05-19_09.18.40.wav
initial_start :  09:18:40.000
initial_end :  09:22:46.541
initial_duration :  246.541 s
final_start :  09:18:50.560
final_end :  09:22:44.771
final_duration :  234.211 s

 230519_TE1_M_BYB_Recording_2023-05-19_09.18.43.wav
initial_start :  09:18:43.000
initial_end :  09:22:49.143
initial_duration :  246.143 s
final_start :  09:18:50.560
final_end :  09:22:44.771
final_duration :  234.211 s

 230519_TK1_M

                                                                     

MoviePy - Done.
Moviepy - Writing video C:\Users\alvar\Desktop\plants measurments\data\230606 SE_reduced\230606_SE1_M_reduced.mp4



                                                                

Moviepy - Done !
Moviepy - video ready C:\Users\alvar\Desktop\plants measurments\data\230606 SE_reduced\230606_SE1_M_reduced.mp4

 230606_SE1_M.mp4
initial_start :  11:17:20.860
initial_end :  11:20:26.000
initial_duration :  185.14 s
final_start :  11:17:20.860
final_end :  11:20:22.638
final_duration :  181.778 s

 230606_SE1_M_BYB_Recording_2023-06-06_11.17.07.wav
initial_start :  11:17:07.000
initial_end :  11:20:22.638
initial_duration :  195.638 s
final_start :  11:17:20.860
final_end :  11:20:22.638
final_duration :  181.778 s

 230606_SE1_M_BYB_Recording_2023-06-06_11.17.09.wav
initial_start :  11:17:09.000
initial_end :  11:20:24.367
initial_duration :  195.367 s
final_start :  11:17:20.860
final_end :  11:20:22.638
final_duration :  181.778 s

 230606_SE1_M_BYB_Recording_2023-06-06_11.17.11.wav
initial_start :  11:17:11.000
initial_end :  11:20:26.862
initial_duration :  195.862 s
final_start :  11:17:20.860
final_end :  11:20:22.638
final_duration :  181.778 s

 230606_SK1_M

                                                                     

MoviePy - Done.
Moviepy - Writing video C:\Users\alvar\Desktop\plants measurments\data\230612 SE_reduced\230612_SE1_M_reduced.mp4



                                                                

Moviepy - Done !
Moviepy - video ready C:\Users\alvar\Desktop\plants measurments\data\230612 SE_reduced\230612_SE1_M_reduced.mp4

 230612_SE1_M.mp4
initial_start :  14:12:58.540
initial_end :  14:16:36.000
initial_duration :  217.46 s
final_start :  14:12:58.540
final_end :  14:16:34.062
final_duration :  215.522 s

 230612_SE1_M_BYB_Recording_2023-06-12_14.12.45.wav
initial_start :  14:12:45.000
initial_end :  14:16:34.062
initial_duration :  229.062 s
final_start :  14:12:58.540
final_end :  14:16:34.062
final_duration :  215.522 s

 230612_SE1_M_BYB_Recording_2023-06-12_14.12.47.wav
initial_start :  14:12:47.000
initial_end :  14:16:35.942
initial_duration :  228.942 s
final_start :  14:12:58.540
final_end :  14:16:34.062
final_duration :  215.522 s

 230612_SE1_M_BYB_Recording_2023-06-12_14.12.49.wav
initial_start :  14:12:49.000
initial_end :  14:16:37.336
initial_duration :  228.336 s
final_start :  14:12:58.540
final_end :  14:16:34.062
final_duration :  215.522 s

 230612_SK1_M

                                                                     

MoviePy - Done.
Moviepy - Writing video C:\Users\alvar\Desktop\plants measurments\data\230612 SJ_reduced\230612_SJ1_M_reduced.mp4



                                                                

Moviepy - Done !
Moviepy - video ready C:\Users\alvar\Desktop\plants measurments\data\230612 SJ_reduced\230612_SJ1_M_reduced.mp4

 230612_SJ1_M.mp4
initial_start :  14:03:30.330
initial_end :  14:07:02.000
initial_duration :  211.67000000000002 s
final_start :  14:03:30.330
final_end :  14:06:59.383
final_duration :  209.053 s

 230612_SJ1_M_BYB_Recording_2023-06-12_14.03.17.wav
initial_start :  14:03:17.000
initial_end :  14:06:59.383
initial_duration :  222.383 s
final_start :  14:03:30.330
final_end :  14:06:59.383
final_duration :  209.053 s

 230612_SJ1_M_BYB_Recording_2023-06-12_14.03.19.wav
initial_start :  14:03:19.000
initial_end :  14:07:01.131
initial_duration :  222.131 s
final_start :  14:03:30.330
final_end :  14:06:59.383
final_duration :  209.053 s

 230612_SJ1_M_BYB_Recording_2023-06-12_14.03.21.wav
initial_start :  14:03:21.000
initial_end :  14:07:03.256
initial_duration :  222.256 s
final_start :  14:03:30.330
final_end :  14:06:59.383
final_duration :  209.053 s

 

                                                                     

MoviePy - Done.
Moviepy - Writing video C:\Users\alvar\Desktop\plants measurments\data\230612 TE_reduced\230612_TE1_M_reduced.mp4



                                                                

Moviepy - Done !
Moviepy - video ready C:\Users\alvar\Desktop\plants measurments\data\230612 TE_reduced\230612_TE1_M_reduced.mp4

 230612_TE1_M.mp4
initial_start :  14:45:03.900
initial_end :  14:49:40.000
initial_duration :  276.1 s
final_start :  14:45:03.900
final_end :  14:49:39.042
final_duration :  275.142 s

 230612_TE1_M_BYB_Recording_2023-06-12_14.44.51.wav
initial_start :  14:44:51.000
initial_end :  14:49:39.042
initial_duration :  288.042 s
final_start :  14:45:03.900
final_end :  14:49:39.042
final_duration :  275.142 s

 230612_TE1_M_BYB_Recording_2023-06-12_14.44.53.wav
initial_start :  14:44:53.000
initial_end :  14:49:41.128
initial_duration :  288.128 s
final_start :  14:45:03.900
final_end :  14:49:39.042
final_duration :  275.142 s

 230612_TE1_M_BYB_Recording_2023-06-12_14.44.55.wav
initial_start :  14:44:55.000
initial_end :  14:49:42.965
initial_duration :  287.965 s
final_start :  14:45:03.900
final_end :  14:49:39.042
final_duration :  275.142 s

 230612_TK1_M_

                                                                     

MoviePy - Done.
Moviepy - Writing video C:\Users\alvar\Desktop\plants measurments\data\230612 TJ_reduced\230612_TJ1_M_reduced.mp4



                                                                

Moviepy - Done !
Moviepy - video ready C:\Users\alvar\Desktop\plants measurments\data\230612 TJ_reduced\230612_TJ1_M_reduced.mp4

 230612_TJ1_M.mp4
initial_start :  14:31:23.830
initial_end :  14:35:56.000
initial_duration :  272.17 s
final_start :  14:31:23.830
final_end :  14:35:55.167
final_duration :  271.337 s

 230612_TJ1_M_BYB_Recording_2023-06-12_14.31.13.wav
initial_start :  14:31:13.000
initial_end :  14:35:55.167
initial_duration :  282.167 s
final_start :  14:31:23.830
final_end :  14:35:55.167
final_duration :  271.337 s

 230612_TJ1_M_BYB_Recording_2023-06-12_14.31.14.wav
initial_start :  14:31:14.000
initial_end :  14:35:56.298
initial_duration :  282.298 s
final_start :  14:31:23.830
final_end :  14:35:55.167
final_duration :  271.337 s

 230612_TJ1_M_BYB_Recording_2023-06-12_14.31.16.wav
initial_start :  14:31:16.000
initial_end :  14:35:57.996
initial_duration :  281.996 s
final_start :  14:31:23.830
final_end :  14:35:55.167
final_duration :  271.337 s

 230612_TK1_M