In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
#file_path = './data/creatures_EDL.csv'
file_path = './data/heartbeat_EDL.csv'
df = pd.read_csv(file_path, sep='\t')
df.columns = ['CHANNEL', 'EVENT', 'CLIP NAME', 'START TIME', 'END TIME', 'DURATION', 'STATE']

In [17]:
df.head(20)

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Heartbeat Tempo Mapped-11.L,00:00:03:11,00:00:03:29,00:00:00:17,Unmuted
1,1,2,Heartbeat Tempo Mapped-91.L,00:00:03:29,00:00:04:07,00:00:00:08,Unmuted
2,1,3,Heartbeat Tempo Mapped-92.L,00:00:04:07,00:00:04:25,00:00:00:17,Unmuted
3,1,4,Heartbeat Tempo Mapped-93.L,00:00:04:25,00:00:05:03,00:00:00:08,Unmuted
4,1,5,Heartbeat Tempo Mapped-95.L,00:00:05:04,00:00:05:21,00:00:00:17,Unmuted
5,1,6,Heartbeat Tempo Mapped-96.L,00:00:05:21,00:00:05:29,00:00:00:08,Unmuted
6,1,7,Heartbeat Tempo Mapped-97.L,00:00:06:00,00:00:06:17,00:00:00:17,Unmuted
7,1,8,Heartbeat Tempo Mapped-84.L,00:00:06:17,00:00:06:25,00:00:00:07,Unmuted
8,1,9,Heartbeat Tempo Mapped-85.L,00:00:06:25,00:00:06:25,00:00:00:00,Unmuted
9,1,10,Heartbeat Tempo Mapped-98.L,00:00:06:26,00:00:07:13,00:00:00:17,Unmuted


In [18]:
df.shape

(1299, 7)

In [19]:
df['DURATION'] = df['DURATION'].str.strip()
df['START TIME'] = df['START TIME'].str.strip()
df['END TIME'] = df['END TIME'].str.strip()

In [20]:
#clean data
df_cleaned = df[df['DURATION'] != "00:00:00:00"]
df_cleaned.index = pd.RangeIndex(start=0, stop=len(df_cleaned), step=1)

# Display the cleaned DataFrame
print(df_cleaned)

      CHANNEL  EVENT                       CLIP NAME   START TIME  \
0           1      1  Heartbeat Tempo Mapped-11.L     00:00:03:11   
1           1      2  Heartbeat Tempo Mapped-91.L     00:00:03:29   
2           1      3  Heartbeat Tempo Mapped-92.L     00:00:04:07   
3           1      4  Heartbeat Tempo Mapped-93.L     00:00:04:25   
4           1      5  Heartbeat Tempo Mapped-95.L     00:00:05:04   
...       ...    ...                             ...          ...   
1160        1   1295  Nu Heartbeat.01_01-210.L        00:11:57:09   
1161        1   1296  Nu Heartbeat.01_01-211.L        00:11:57:17   
1162        1   1297  Nu Heartbeat.01_01-22.L         00:11:57:27   
1163        1   1298  Nu Heartbeat.01_01-212.L        00:11:58:06   
1164        1   1299  Nu Heartbeat.01_01-174.L        00:11:58:16   

         END TIME     DURATION    STATE  
0     00:00:03:29  00:00:00:17  Unmuted  
1     00:00:04:07  00:00:00:08  Unmuted  
2     00:00:04:25  00:00:00:17  Unmuted  
3  

In [21]:
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Heartbeat Tempo Mapped-11.L,00:00:03:11,00:00:03:29,00:00:00:17,Unmuted
1,1,2,Heartbeat Tempo Mapped-91.L,00:00:03:29,00:00:04:07,00:00:00:08,Unmuted
2,1,3,Heartbeat Tempo Mapped-92.L,00:00:04:07,00:00:04:25,00:00:00:17,Unmuted
3,1,4,Heartbeat Tempo Mapped-93.L,00:00:04:25,00:00:05:03,00:00:00:08,Unmuted
4,1,5,Heartbeat Tempo Mapped-95.L,00:00:05:04,00:00:05:21,00:00:00:17,Unmuted


In [26]:
df_cleaned['START TIME'] = df_cleaned['START TIME'].astype(str)

def timecode_to_frames(timecode):
    try:
        hours, minutes, seconds, frames = map(int, timecode.split(':'))
        total_frames = ((hours * 3600) + (minutes * 60) + seconds) * 30 + frames
        return total_frames
    except ValueError:
        return None  # Return None for invalid timecodes

# Apply the function to the 'START TIME' column
df_cleaned['Frames'] = df_cleaned['START TIME'].apply(timecode_to_frames)
df_cleaned['Runtime'] = df_cleaned['DURATION'].apply(timecode_to_frames)
df_cleaned['Beats'] = df_cleaned.index % 2
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime,Beats
0,1,1,Heartbeat Tempo Mapped-11.L,00:00:03:11,00:00:03:29,00:00:00:17,Unmuted,101,17,0
1,1,2,Heartbeat Tempo Mapped-91.L,00:00:03:29,00:00:04:07,00:00:00:08,Unmuted,119,8,1
2,1,3,Heartbeat Tempo Mapped-92.L,00:00:04:07,00:00:04:25,00:00:00:17,Unmuted,127,17,0
3,1,4,Heartbeat Tempo Mapped-93.L,00:00:04:25,00:00:05:03,00:00:00:08,Unmuted,145,8,1
4,1,5,Heartbeat Tempo Mapped-95.L,00:00:05:04,00:00:05:21,00:00:00:17,Unmuted,154,17,0


In [27]:
# Convert 'Frames' to integer
df_cleaned['Frames'] = df_cleaned['Frames'].fillna(0)
df_cleaned['Frames'] = df_cleaned['Frames'].astype(int)

# Sort by 'Frames' column in ascending order and reset the index
df_cleaned = df_cleaned.sort_values(by='Frames').reset_index(drop=True)
df_cleaned.tail()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime,Beats
1160,1,1295,Nu Heartbeat.01_01-210.L,00:11:57:09,00:11:57:16,00:00:00:06,Unmuted,21519,6,0
1161,1,1296,Nu Heartbeat.01_01-211.L,00:11:57:17,00:11:57:26,00:00:00:09,Unmuted,21527,9,1
1162,1,1297,Nu Heartbeat.01_01-22.L,00:11:57:27,00:11:58:05,00:00:00:08,Unmuted,21537,8,0
1163,1,1298,Nu Heartbeat.01_01-212.L,00:11:58:06,00:11:58:16,00:00:00:09,Unmuted,21546,9,1
1164,1,1299,Nu Heartbeat.01_01-174.L,00:11:58:16,00:11:58:25,00:00:00:08,Unmuted,21556,8,0


In [28]:
# Save the new DataFrame as a CSV file
output_path = './data/heartbeat_01.csv'
df_cleaned.to_csv(output_path, index=False)