In [9]:
import csv
import pandas as pd
from datetime import datetime, timedelta

csv_path = "Internals-Cricket GT Evaluation  - Sujeet.csv"
MAX_CLIP_LENGTH = 60

def find_header_line(path):
    # match header row by looking at items in header list
    header_list = ['Time Stamp']
    count = 0
    with open(path, 'r') as f:
        for row in csv.reader(f):
            if all(item in row for item in header_list):
                break
            count += 1
    return count

def string_to_time(str_value):
    if str_value:
        try:
            if str_value.count(':') == 1:
                str_value = datetime.strptime(str_value.strip(), '%M:%S')
            elif str_value.count(':') == 2:
                str_value = datetime.strptime(str_value.strip(), '%H:%M:%S')
            else:
                raise Exception(f'Incorrect time format :: {str_value}. Please check..')
        except:
            raise Exception(f'Incorrect time format :: {str_value}. Please check..')
    return str_value

def check_order(df_series):
    for i in range(len(df_series) - 1):
        if (df_series.iloc[i+1] - df_series.iloc[i]) < timedelta(0):
            raise Exception(f'Index {i} and {i+1} not in increasing order.\nTIME STAMPS :: {df_series.iloc[i]}\t{df_series.iloc[i+1]}')

def get_dataframe(path, time_column):
        # find rows to skip and read csv from there
        skip_count = find_header_line(path)
        df = pd.read_csv(path, skiprows=skip_count)

        # drop columns with all null values (if exists)
        df = df.dropna(axis=1, how='all')
        
        # create different columns for start and end time
        df[['Start_Time', 'End_Time']] = df[time_column].str.split("-", expand = True)
        
        # replace nan with None
        df = df.where(df.notnull(), None)
        
        # convert dtype from string to datetime
        df['Start_Time'] = df['Start_Time'].apply(string_to_time)
        df['End_Time'] = df['End_Time'].apply(string_to_time)
        
        # check if start and end times are in increasing order
        check_order(df['Start_Time'])
        print(f"Start time already in ascending order.")
        df.reset_index(drop=True, inplace=True)
        return df

def check_df_timestamp(df):
    wrong_timestamps = []
    for time_stamp, st, et in zip(df['Time Stamp'],
                                            df['Start_Time'],
                                            df['End_Time']):
        if st >= et:
            wrong_timestamps.append(time_stamp)
        elif (et - st).seconds > MAX_CLIP_LENGTH:
            wrong_timestamps.append(time_stamp)
    return wrong_timestamps

def read_and_verify_dataframes(path):
    # read csv sorted in ascending order of event start time
    df = get_dataframe(path, "Time Stamp")

    # check formatting of timestamps to ensure smooth conversion to datetime
    errors = check_df_timestamp(df)
    if errors:
        print(f"WRONG TIMESTAMPS :: {errors}")
        raise Exception("Wrong timestamps in evaluation sheet.")
    print("No issues found in csv file.")


read_and_verify_dataframes(csv_path)

Start time already in ascending order.
No issues found in csv file.
