In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
file_path = './data/creatures_EDL.csv'
df = pd.read_csv(file_path, sep='\t')
df.columns = ['CHANNEL', 'EVENT', 'CLIP NAME', 'START TIME', 'END TIME', 'DURATION', 'STATE']

In [11]:
df.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,P3 Alien Fish Chatter Ab-Mldy-01.L,00:03:12:14,00:03:13:20,00:00:01:05,Unmuted
1,1,2,P3 Alien Fish Chatter Ab-Mldy-02.L,00:03:17:12,00:03:19:05,00:00:01:23,Unmuted
2,1,3,P3 Alien Fish Chatter Ab-Mldy-03.L,00:03:26:12,00:03:28:06,00:00:01:24,Unmuted
3,1,4,P3 Alien Fish Chatter Ab-Mldy-04.L,00:03:28:06,00:03:31:11,00:00:03:05,Unmuted
4,1,5,P3 Alien Fish Chatter Ab-Mldy-05.L,00:03:37:27,00:03:38:16,00:00:00:19,Unmuted


In [12]:
df.shape

(59, 7)

In [13]:
df['DURATION'] = df['DURATION'].str.strip()
df['START TIME'] = df['START TIME'].str.strip()
df['END TIME'] = df['END TIME'].str.strip()

In [14]:
#clean data
df_cleaned = df[df['DURATION'] != "00:00:00:00"]

# Reindex the DataFrame
df_cleaned.reset_index(drop=True, inplace=True)

# Display the cleaned DataFrame
print(df_cleaned)

     CHANNEL     EVENT                              CLIP NAME   START TIME  \
0   1         1            P3 Alien Fish Chatter Ab-Mldy-01.L  00:03:12:14   
1   1         2            P3 Alien Fish Chatter Ab-Mldy-02.L  00:03:17:12   
2   1         3            P3 Alien Fish Chatter Ab-Mldy-03.L  00:03:26:12   
3   1         4            P3 Alien Fish Chatter Ab-Mldy-04.L  00:03:28:06   
4   1         5            P3 Alien Fish Chatter Ab-Mldy-05.L  00:03:37:27   
5   1         6            P3 Alien Fish Chatter Ab-Mldy-06.L  00:03:48:29   
6   1         7            P3 Alien Fish Chatter Ab-Mldy-07.L  00:03:59:14   
7   1         8                SFX LFO Sparkles Ab-Mldy-02.L   00:06:32:04   
8   1         9                SFX Water Comb-Mldy-05.L        00:06:52:23   
9   1         10               SFX Water Comb-Mldy-06.L        00:07:04:00   
10  1         11               SFX Water Comb-Mldy-07.L        00:07:36:17   
11  CHANNEL   EVENT            CLIP NAME                        

In [15]:
df_cleaned.shape

(59, 7)

In [16]:
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,P3 Alien Fish Chatter Ab-Mldy-01.L,00:03:12:14,00:03:13:20,00:00:01:05,Unmuted
1,1,2,P3 Alien Fish Chatter Ab-Mldy-02.L,00:03:17:12,00:03:19:05,00:00:01:23,Unmuted
2,1,3,P3 Alien Fish Chatter Ab-Mldy-03.L,00:03:26:12,00:03:28:06,00:00:01:24,Unmuted
3,1,4,P3 Alien Fish Chatter Ab-Mldy-04.L,00:03:28:06,00:03:31:11,00:00:03:05,Unmuted
4,1,5,P3 Alien Fish Chatter Ab-Mldy-05.L,00:03:37:27,00:03:38:16,00:00:00:19,Unmuted


In [18]:
def timecode_to_frames(timecode):
    try:
        hours, minutes, seconds, frames = map(int, timecode.split(':'))
        total_frames = ((hours * 3600) + (minutes * 60) + seconds) * 30 + frames
        return total_frames
    except ValueError:
        return None  # Return None for invalid timecodes

# Apply the function to the 'START TIME' column
df_cleaned['Frames'] = df_cleaned['START TIME'].apply(timecode_to_frames)
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames
0,1,1,P3 Alien Fish Chatter Ab-Mldy-01.L,00:03:12:14,00:03:13:20,00:00:01:05,Unmuted,5774.0
1,1,2,P3 Alien Fish Chatter Ab-Mldy-02.L,00:03:17:12,00:03:19:05,00:00:01:23,Unmuted,5922.0
2,1,3,P3 Alien Fish Chatter Ab-Mldy-03.L,00:03:26:12,00:03:28:06,00:00:01:24,Unmuted,6192.0
3,1,4,P3 Alien Fish Chatter Ab-Mldy-04.L,00:03:28:06,00:03:31:11,00:00:03:05,Unmuted,6246.0
4,1,5,P3 Alien Fish Chatter Ab-Mldy-05.L,00:03:37:27,00:03:38:16,00:00:00:19,Unmuted,6537.0


In [21]:
# Convert 'Frames' to integer
df_cleaned['Frames'] = df_cleaned['Frames'].fillna(0)
df_cleaned['Frames'] = df_cleaned['Frames'].astype(int)

# Sort by 'Frames' column in ascending order and reset the index
df_cleaned = df_cleaned.sort_values(by='Frames').reset_index(drop=True)
df_cleaned.tail()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames
54,1,13,Clang Inst.02-cm_01-26.L,00:10:31:14,00:10:35:09,00:00:03:25,Unmuted,18944
55,1,14,Clang Inst.02-cm_01-28.L,00:10:54:00,00:10:57:20,00:00:03:20,Unmuted,19620
56,1,15,Clang Inst.02-cm_01-30.L,00:11:26:23,00:11:30:12,00:00:03:19,Unmuted,20603
57,1,16,Clang Inst.02-cm_01-32.L,00:11:37:00,00:11:40:18,00:00:03:18,Unmuted,20910
58,1,17,Clang Inst.02-cm_01-33.L,00:11:50:15,00:11:54:05,00:00:03:20,Unmuted,21315


In [23]:
# Save the new DataFrame as a CSV file
output_path = './data/creatures_01.csv'
df_cleaned.to_csv(output_path, index=False)