In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
#file_path = './data/creatures_EDL.csv'
file_path = './data/breath_EDL.csv'
df = pd.read_csv(file_path, sep='\t')
df.columns = ['CHANNEL', 'EVENT', 'CLIP NAME', 'START TIME', 'END TIME', 'DURATION', 'STATE']

In [26]:
df.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1.0,Breath.dup2 - From Brett.02_01-02,00:00:03:07,00:00:07:16,00:00:04:09,Unmuted
1,1,2.0,Breath.dup2 - From Brett.02_01-04,00:00:07:16,00:00:11:22,00:00:04:06,Unmuted
2,1,3.0,Breath.dup2 - From Brett.02_01-06,00:00:11:22,00:00:15:04,00:00:03:11,Unmuted
3,1,4.0,Breath.dup2 - From Brett.02_01-08,00:00:15:04,00:00:19:16,00:00:04:11,Unmuted
4,1,5.0,Breath.dup2 - From Brett.02_01-10,00:00:19:16,00:00:23:14,00:00:03:28,Unmuted


In [27]:
df.shape

(103, 7)

In [28]:
df['DURATION'] = df['DURATION'].str.strip()
df['START TIME'] = df['START TIME'].str.strip()
df['END TIME'] = df['END TIME'].str.strip()

In [29]:
#clean data
df_cleaned = df[df['DURATION'] != "00:00:00:00"]

# Reindex the DataFrame
df_cleaned.reset_index(drop=True, inplace=True)

# Display the cleaned DataFrame
print(df_cleaned)

                      CHANNEL  EVENT                           CLIP NAME  \
0                    1           1.0   Breath.dup2 - From Brett.02_01-02   
1                    1           2.0   Breath.dup2 - From Brett.02_01-04   
2                    1           3.0   Breath.dup2 - From Brett.02_01-06   
3                    1           4.0   Breath.dup2 - From Brett.02_01-08   
4                    1           5.0   Breath.dup2 - From Brett.02_01-10   
..                        ...    ...                                 ...   
98                   1          99.0  Breath.dup2 - From Brett.02_01-198   
99                   1         100.0  Breath.dup2 - From Brett.02_01-200   
100                  1         101.0  Breath.dup2 - From Brett.02_01-202   
101                  1         102.0  Breath.dup2 - From Brett.02_01-203   
102                              NaN                                 NaN   

      START TIME     END TIME     DURATION    STATE  
0    00:00:03:07  00:00:07:16  00

In [33]:
df_cleaned.dtypes

CHANNEL        object
EVENT         float64
CLIP NAME      object
START TIME     object
END TIME       object
DURATION       object
STATE          object
dtype: object

In [31]:
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1.0,Breath.dup2 - From Brett.02_01-02,00:00:03:07,00:00:07:16,00:00:04:09,Unmuted
1,1,2.0,Breath.dup2 - From Brett.02_01-04,00:00:07:16,00:00:11:22,00:00:04:06,Unmuted
2,1,3.0,Breath.dup2 - From Brett.02_01-06,00:00:11:22,00:00:15:04,00:00:03:11,Unmuted
3,1,4.0,Breath.dup2 - From Brett.02_01-08,00:00:15:04,00:00:19:16,00:00:04:11,Unmuted
4,1,5.0,Breath.dup2 - From Brett.02_01-10,00:00:19:16,00:00:23:14,00:00:03:28,Unmuted


In [34]:
df_cleaned['START TIME'] = df_cleaned['START TIME'].astype(str)

def timecode_to_frames(timecode):
    try:
        hours, minutes, seconds, frames = map(int, timecode.split(':'))
        total_frames = ((hours * 3600) + (minutes * 60) + seconds) * 30 + frames
        return total_frames
    except ValueError:
        return None  # Return None for invalid timecodes

# Apply the function to the 'START TIME' column
df_cleaned['Frames'] = df_cleaned['START TIME'].apply(timecode_to_frames)
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames
0,1,1.0,Breath.dup2 - From Brett.02_01-02,00:00:03:07,00:00:07:16,00:00:04:09,Unmuted,97.0
1,1,2.0,Breath.dup2 - From Brett.02_01-04,00:00:07:16,00:00:11:22,00:00:04:06,Unmuted,226.0
2,1,3.0,Breath.dup2 - From Brett.02_01-06,00:00:11:22,00:00:15:04,00:00:03:11,Unmuted,352.0
3,1,4.0,Breath.dup2 - From Brett.02_01-08,00:00:15:04,00:00:19:16,00:00:04:11,Unmuted,454.0
4,1,5.0,Breath.dup2 - From Brett.02_01-10,00:00:19:16,00:00:23:14,00:00:03:28,Unmuted,586.0


In [35]:
# Convert 'Frames' to integer
df_cleaned['Frames'] = df_cleaned['Frames'].fillna(0)
df_cleaned['Frames'] = df_cleaned['Frames'].astype(int)

# Sort by 'Frames' column in ascending order and reset the index
df_cleaned = df_cleaned.sort_values(by='Frames').reset_index(drop=True)
df_cleaned.tail()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames
98,1,98.0,Breath.dup2 - From Brett.02_01-196,00:11:14:05,00:11:23:28,00:00:09:23,Unmuted,20225
99,1,99.0,Breath.dup2 - From Brett.02_01-198,00:11:28:16,00:11:32:27,00:00:04:10,Unmuted,20656
100,1,100.0,Breath.dup2 - From Brett.02_01-200,00:11:33:04,00:11:36:23,00:00:03:18,Unmuted,20794
101,1,101.0,Breath.dup2 - From Brett.02_01-202,00:11:39:28,00:11:44:03,00:00:04:05,Unmuted,20998
102,1,102.0,Breath.dup2 - From Brett.02_01-203,00:11:44:03,00:11:47:26,00:00:03:22,Unmuted,21123


In [36]:
# Save the new DataFrame as a CSV file
output_path = './data/breaths_01.csv'
df_cleaned.to_csv(output_path, index=False)