In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
#file_path = './data/creatures_EDL.csv'
file_path = './data/angels_EDL.csv'
df = pd.read_csv(file_path, sep='\t')
df.columns = ['CHANNEL', 'EVENT', 'CLIP NAME', 'START TIME', 'END TIME', 'DURATION', 'STATE']

In [4]:
df.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Hoto2,00:08:31:28,00:09:10:29,00:00:39:01,Unmuted
1,1,2,Morgan Vocal Pads 2,00:09:17:29,00:09:35:07,00:00:17:07,Unmuted


In [5]:
df.shape

(2, 7)

In [6]:
df['DURATION'] = df['DURATION'].str.strip()
df['START TIME'] = df['START TIME'].str.strip()
df['END TIME'] = df['END TIME'].str.strip()

In [7]:
#clean data
df_cleaned = df[df['DURATION'] != "00:00:00:00"]

# Reindex the DataFrame
df_cleaned.reset_index(drop=True, inplace=True)

# Display the cleaned DataFrame
print(df_cleaned)

   CHANNEL  EVENT            CLIP NAME   START TIME     END TIME     DURATION  \
0        1      1                Hoto2  00:08:31:28  00:09:10:29  00:00:39:01   
1        1      2  Morgan Vocal Pads 2  00:09:17:29  00:09:35:07  00:00:17:07   

     STATE  
0  Unmuted  
1  Unmuted  


In [8]:
df_cleaned.dtypes

CHANNEL        int64
EVENT          int64
CLIP NAME     object
START TIME    object
END TIME      object
DURATION      object
STATE         object
dtype: object

In [9]:
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Hoto2,00:08:31:28,00:09:10:29,00:00:39:01,Unmuted
1,1,2,Morgan Vocal Pads 2,00:09:17:29,00:09:35:07,00:00:17:07,Unmuted


In [10]:
df_cleaned['START TIME'] = df_cleaned['START TIME'].astype(str)

def timecode_to_frames(timecode):
    try:
        hours, minutes, seconds, frames = map(int, timecode.split(':'))
        total_frames = ((hours * 3600) + (minutes * 60) + seconds) * 30 + frames
        return total_frames
    except ValueError:
        return None  # Return None for invalid timecodes

# Apply the function to the 'START TIME' column
df_cleaned['Frames'] = df_cleaned['START TIME'].apply(timecode_to_frames)
df_cleaned['Runtime'] = df_cleaned['DURATION'].apply(timecode_to_frames)
df_cleaned['Beats'] = df_cleaned['EVENT'] % 2
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime,Beats
0,1,1,Hoto2,00:08:31:28,00:09:10:29,00:00:39:01,Unmuted,15358,1171,1
1,1,2,Morgan Vocal Pads 2,00:09:17:29,00:09:35:07,00:00:17:07,Unmuted,16739,517,0


In [11]:
# Convert 'Frames' to integer
df_cleaned['Frames'] = df_cleaned['Frames'].fillna(0)
df_cleaned['Frames'] = df_cleaned['Frames'].astype(int)

# Sort by 'Frames' column in ascending order and reset the index
df_cleaned = df_cleaned.sort_values(by='Frames').reset_index(drop=True)
df_cleaned.tail()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime,Beats
0,1,1,Hoto2,00:08:31:28,00:09:10:29,00:00:39:01,Unmuted,15358,1171,1
1,1,2,Morgan Vocal Pads 2,00:09:17:29,00:09:35:07,00:00:17:07,Unmuted,16739,517,0


In [12]:
# Save the new DataFrame as a CSV file
output_path = './data/angels_01.csv'
df_cleaned.to_csv(output_path, index=False)