In [79]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
#file_path = './data/creatures_EDL.csv'
channel = "climate"
file_path = './data/'+channel+'_EDL.csv'
df = pd.read_csv(file_path, sep='\t')
df.columns = ['CHANNEL', 'EVENT', 'CLIP NAME', 'START TIME', 'END TIME', 'DURATION', 'STATE']

In [80]:
df.head(58)

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Rising Tone 3mins Phaz4_1.L.wav-02,00:11:45:27,00:11:58:26,00:00:12:28,Unmuted


In [81]:
df.shape

(1, 7)

In [82]:
df['DURATION'] = df['DURATION'].str.strip()
df['START TIME'] = df['START TIME'].str.strip()
df['END TIME'] = df['END TIME'].str.strip()

In [83]:
#clean data
df_cleaned = df[df['DURATION'] != "00:00:00:00"]

# Reindex the DataFrame
df_cleaned.reset_index(drop=True, inplace=True)

# Display the cleaned DataFrame
print(df_cleaned)

   CHANNEL  EVENT                           CLIP NAME   START TIME  \
0        1      1  Rising Tone 3mins Phaz4_1.L.wav-02  00:11:45:27   

      END TIME     DURATION    STATE  
0  00:11:58:26  00:00:12:28  Unmuted  


In [84]:
df_cleaned.dtypes

CHANNEL        int64
EVENT          int64
CLIP NAME     object
START TIME    object
END TIME      object
DURATION      object
STATE         object
dtype: object

In [85]:
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Rising Tone 3mins Phaz4_1.L.wav-02,00:11:45:27,00:11:58:26,00:00:12:28,Unmuted


In [86]:
df_cleaned['START TIME'] = df_cleaned['START TIME'].astype(str)

def timecode_to_frames(timecode):
    try:
        hours, minutes, seconds, frames = map(int, timecode.split(':'))
        total_frames = ((hours * 3600) + (minutes * 60) + seconds) * 30 + frames
        return total_frames
    except ValueError:
        return None  # Return None for invalid timecodes

# Apply the function to the 'START TIME' column
df_cleaned['Frames'] = df_cleaned['START TIME'].apply(timecode_to_frames)
df_cleaned['Runtime'] = df_cleaned['DURATION'].apply(timecode_to_frames)
df_cleaned['Beats'] = df_cleaned['EVENT'] % 2
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime,Beats
0,1,1,Rising Tone 3mins Phaz4_1.L.wav-02,00:11:45:27,00:11:58:26,00:00:12:28,Unmuted,21177,388,1


In [87]:
# Convert 'Frames' to integer
df_cleaned['Frames'] = df_cleaned['Frames'].fillna(0)
df_cleaned['Frames'] = df_cleaned['Frames'].astype(int)

# Sort by 'Frames' column in ascending order and reset the index
df_cleaned = df_cleaned.sort_values(by='Frames').reset_index(drop=True)
df_cleaned.tail()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime,Beats
0,1,1,Rising Tone 3mins Phaz4_1.L.wav-02,00:11:45:27,00:11:58:26,00:00:12:28,Unmuted,21177,388,1


In [88]:
# Save the new DataFrame as a CSV file
output_path = './data/'+channel+'_01.csv'
df_cleaned.to_csv(output_path, index=False)