In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
#file_path = './data/creatures_EDL.csv'
file_path = './data/breath_EDL.csv'
df = pd.read_csv(file_path, sep='\t')
df.columns = ['CHANNEL', 'EVENT', 'CLIP NAME', 'START TIME', 'END TIME', 'DURATION', 'STATE']

In [48]:
df.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Inhale,00:00:03:07,00:00:07:16,00:00:04:09,Unmuted
1,1,2,Inhale,00:00:11:22,00:00:15:04,00:00:03:11,Unmuted
2,1,3,Inhale,00:00:19:16,00:00:23:14,00:00:03:28,Unmuted
3,1,4,Inhale,00:00:27:23,00:00:31:10,00:00:03:16,Unmuted
4,1,5,Inhale,00:00:35:19,00:00:38:29,00:00:03:10,Unmuted


In [49]:
df.shape

(103, 7)

In [50]:
df['DURATION'] = df['DURATION'].str.strip()
df['START TIME'] = df['START TIME'].str.strip()
df['END TIME'] = df['END TIME'].str.strip()

In [51]:
#clean data
df_cleaned = df[df['DURATION'] != "00:00:00:00"]

# Reindex the DataFrame
df_cleaned.reset_index(drop=True, inplace=True)

# Display the cleaned DataFrame
print(df_cleaned)

     CHANNEL  EVENT                       CLIP NAME   START TIME     END TIME  \
0          1      1  Inhale                          00:00:03:07  00:00:07:16   
1          1      2  Inhale                          00:00:11:22  00:00:15:04   
2          1      3  Inhale                          00:00:19:16  00:00:23:14   
3          1      4  Inhale                          00:00:27:23  00:00:31:10   
4          1      5  Inhale                          00:00:35:19  00:00:38:29   
..       ...    ...                             ...          ...          ...   
98         1     47  Exhale                          00:10:36:06  00:10:43:18   
99         1     48  Exhale                          00:10:57:19  00:11:02:26   
100        1     49  Exhale                          00:11:14:16  00:11:19:07   
101        1     50  Exhale                          00:11:28:16  00:11:32:27   
102        1     51  Exhale                          00:11:39:28  00:11:44:03   

        DURATION    STATE  

In [52]:
df_cleaned.dtypes

CHANNEL        int64
EVENT          int64
CLIP NAME     object
START TIME    object
END TIME      object
DURATION      object
STATE         object
dtype: object

In [53]:
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE
0,1,1,Inhale,00:00:03:07,00:00:07:16,00:00:04:09,Unmuted
1,1,2,Inhale,00:00:11:22,00:00:15:04,00:00:03:11,Unmuted
2,1,3,Inhale,00:00:19:16,00:00:23:14,00:00:03:28,Unmuted
3,1,4,Inhale,00:00:27:23,00:00:31:10,00:00:03:16,Unmuted
4,1,5,Inhale,00:00:35:19,00:00:38:29,00:00:03:10,Unmuted


In [54]:
df_cleaned['START TIME'] = df_cleaned['START TIME'].astype(str)

def timecode_to_frames(timecode):
    try:
        hours, minutes, seconds, frames = map(int, timecode.split(':'))
        total_frames = ((hours * 3600) + (minutes * 60) + seconds) * 30 + frames
        return total_frames
    except ValueError:
        return None  # Return None for invalid timecodes

# Apply the function to the 'START TIME' column
df_cleaned['Frames'] = df_cleaned['START TIME'].apply(timecode_to_frames)
df_cleaned['Runtime'] = df_cleaned['DURATION'].apply(timecode_to_frames)
df_cleaned.head()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime
0,1,1,Inhale,00:00:03:07,00:00:07:16,00:00:04:09,Unmuted,97,129
1,1,2,Inhale,00:00:11:22,00:00:15:04,00:00:03:11,Unmuted,352,101
2,1,3,Inhale,00:00:19:16,00:00:23:14,00:00:03:28,Unmuted,586,118
3,1,4,Inhale,00:00:27:23,00:00:31:10,00:00:03:16,Unmuted,833,106
4,1,5,Inhale,00:00:35:19,00:00:38:29,00:00:03:10,Unmuted,1069,100


In [55]:
# Convert 'Frames' to integer
df_cleaned['Frames'] = df_cleaned['Frames'].fillna(0)
df_cleaned['Frames'] = df_cleaned['Frames'].astype(int)

# Sort by 'Frames' column in ascending order and reset the index
df_cleaned = df_cleaned.sort_values(by='Frames').reset_index(drop=True)
df_cleaned.tail()

Unnamed: 0,CHANNEL,EVENT,CLIP NAME,START TIME,END TIME,DURATION,STATE,Frames,Runtime
98,1,50,Inhale,00:11:19:25,00:11:23:28,00:00:04:02,Unmuted,20395,122
99,1,50,Exhale,00:11:28:16,00:11:32:27,00:00:04:10,Unmuted,20656,130
100,1,51,Inhale,00:11:33:04,00:11:36:23,00:00:03:18,Unmuted,20794,108
101,1,51,Exhale,00:11:39:28,00:11:44:03,00:00:04:05,Unmuted,20998,125
102,1,52,Inhale,00:11:44:03,00:11:47:26,00:00:03:22,Unmuted,21123,112


In [56]:
# Save the new DataFrame as a CSV file
output_path = './data/breaths_01.csv'
df_cleaned.to_csv(output_path, index=False)