In [7]:
# Import necessary libraries
import pandas as pd
import re

In [32]:

#-------------------------------------------------------------------------------------------------------------------------------------------------#

#------------------------------------------------------------ Data Cleaning global  functions------------------------------------------------------------------------#
# Function to convert time string to seconds
def remove_unwanted_char(csv_file):
    df = pd.read_csv(csv_file, encoding='ISO-8859-1')

    # Remove unwanted characters using regular expressions
    df['video_correct_naming'] = df['video_name'].dropna().apply(lambda x: re.sub(r'[’\'"\-,\\s]', '', x))
    df['video_name'] = df['video_correct_naming']
            
    print(f'The video names are now in proper naming convention!\n')
    vidio_names = df['video_name']
    mask = df['video_name'].notna()
    print(vidio_names[mask], "\n\n")
            
    # Save the modified DataFrame back to the CSV file with the same encoding
    
    df.to_csv(csv_file, index=False, encoding='ISO-8859-1')


# Function to convert time string to seconds
def time_to_seconds(time_str):
    """
    Convert a time string in the format "HH:MM:SS" to seconds.
    """
    parts = time_str.split(':')
    h, m = map(int, parts[:2])
    s = int(parts[2]) if len(parts) == 3 else 0

    return h * 3600 + m * 60 + s


In [26]:
#-------------------------------------------------------------------------------------------------------------------------------------------------#

#---------------------------------------------------- Main Script:: Time stamp validation of the csv file  ------------------------------------------------------------------------#


# Function to Validate time stamp
def time_stamp_CSV_File_validation(excel_file,start,stop):
    """
     Clean the video naming convention and Check if the time stamp of the stop time is greater than the start time for the cropping
    """
     # Try multiple encodings
    csv_encodings = ['utf-8', 'latin1', 'ISO-8859-1']  # Add more encodings if needed


    #clean and update video naming convention
    print("---> Videos naming convention cleaning and Validation <---\n")
    print("------------------------------------------------\n")
    
    remove_unwanted_char(excel_file)
    print("Unwanted characters removed from 'video_name' column and saved to the file.")
    print("------------------------------------------------\n")
    print("------------------------------------------------\n\n")
    print("--->             Timestamp Validation                  <---\n\n")

# Validation of timestamps 
    for csv_encoding in csv_encodings:
        try:
            video_data = pd.read_csv(excel_file, encoding=csv_encoding)
            break  # If successful, exit the loop
        except UnicodeDecodeError:
            continue  # Try the next encoding if decoding fails

    for _, row in video_data.iterrows():
        if not pd.isna(row['video_name']):
            # this helps to change video names if one video have multiple timestamp
            video_name = row['video_name']+str(".mp4")
            path_name = row['video_name']

        if not pd.isna(row[start]):
            start_time = row[start]
            stop_time = row[stop]


            start_seconds = time_to_seconds(start_time)
            stop_seconds = time_to_seconds(stop_time)
            if (start_seconds>stop_seconds):
                print(f'incorect time stamp for: {path_name} ')
            else:
                print(f'stop time: {stop_time} is OK!')
    return 'Check completed!'

In [33]:
# excel_file = "testing_file.csv"  # Replace with the path to your Excel file

# start = 'start_time'
# stop = 'stop_time'
# time_stamp_CSV_File_validation(excel_file,start,stop)

---> Videos naming convention cleaning and Validation <---

------------------------------------------------

The video names are now in proper naming convention!

0     NSMQ_2019_ONEEIGHTH_POPE_JOHN_SHS_VS_NOTRE_DAM...
4     NSMQ_2019_ONEEIGHTH_PRESEC_LEGON_VS_ASSIN_STAT...
8     NSMQ_2019_ONEEIGHTH_ST._JOHNS_SCHOOL_VS_TAMALE...
12    NSMQ_2019_ONEEIGHTH_KUMASI_ACADEMY__VS_ANLO_SH...
16    NSMQ_2019_ONEEIGHTH_GHANA_SHS_TAMALE_VS_ARMED_...
20    NSMQ_2019_ONEEIGHTH_ADISADEL_COLLEGE_VS_NSUTAM...
24    NSMQ_2019_ONEEIGHTH_ST._IGNATIUS_OF_LOYOLA_SHS...
32    NSMQ_2019_ONEEIGHTH_OPOKU_WARE_SCHOOL_VS_ANGLI...
40    NSMQ_2019_ONEEIGHTH_TEPA_SHS_VS_ISLAMIC_SHS_KU...
Name: video_name, dtype: object 


------------------------------------------------

------------------------------------------------


--->             Timestamp Validation                  <---


stop time: 01:48:57 is OK!
stop time: 01:49:36 is OK!
stop time: 01:50:23 is OK!
stop time: 01:59:41 is OK!
stop time: 02:45 is OK!
st

'Check completed!'