In [2]:
import os
import torchaudio
from scipy.io import wavfile
import pandas as pd
import cv2
from moviepy.editor import VideoFileClip

In [9]:
df = pd.read_csv('../../annotations/console_times/combined_console_times_secs.csv', index_col=0)
df.drop(columns=[col for col in df.columns if col not in ['Mentor ID', 'Trainee ID', 'case_id', 'On time (secs)']], inplace=True)
df.loc[df['On time (secs)'] < 0, 'On time (secs)'] = 0
df['On time (hms)'] = df['On time (secs)'].apply(lambda x: str(int(x//3600)).zfill(2) + ':' + str(int((x%3600)//60)).zfill(2) + ':' + str(int(x%60)).zfill(2))

df[df['case_id'] == 9]

Unnamed: 0,Mentor ID,Trainee ID,case_id,On time (secs),On time (hms)
119,A3,A7,9,74,00:01:14
120,A3,A7,9,314,00:05:14
121,A3,A7,9,704,00:11:44
122,A3,A7,9,1154,00:19:14
123,A3,A7,9,1574,00:26:14
124,A3,A7,9,1714,00:28:34
125,A3,A7,9,2294,00:38:14
126,A3,A7,9,2789,00:46:29
127,A3,A7,9,3794,01:03:14
128,A3,A7,9,4439,01:13:59


In [8]:
df['Trainee ID'].unique()

array(['A3', 'A4', 'A2', 'A1', 'a1', 'A5', 'A6', 'no resident on console',
       nan, 'A7', 'A8', 'A9', 'A3 (kian)', 'A3 (kian) ', 'A3 ', 'A5 ',
       'A6 ', 'A10', 'A12', 'A5 -> A9'], dtype=object)

In [4]:
import torchaudio
import torchaudio.transforms as T
import os
import torchaudio
from scipy.io import wavfile
import pandas as pd
import itables

# pd.set_option('display.max_colwidth', None)

def extract_anchor(full_audio_path, save_path, start_hms, duration):
    print(f"Extracting clip from {full_audio_path} to {save_path}")
    
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    secs = sum(x * int(t) for x, t in zip([3600, 60, 1], start_hms.split(':')))
    wav, sr = torchaudio.load(full_audio_path)
    clip = wav[:, int(sr*secs):int(sr*(secs+duration))]
    clip = T.Resample(sr, 16000)(clip)
    wavfile.write(save_path, 16000, clip.numpy().T)
    
def df_after_hms(path, hms):
    identifications = pd.read_csv(path)
    
    identifications['start_hms'] = identifications['start'].apply(lambda x: str(int(x//3600)).zfill(2) + ':' + str(int((x%3600)//60)).zfill(2) + ':' + str(int(x%60)).zfill(2))
    identifications['end_hms'] = identifications['end'].apply(lambda x: str(int(x//3600)).zfill(2) + ':' + str(int((x%3600)//60)).zfill(2) + ':' + str(int(x%60)).zfill(2))
    
    identifications['start_hms'] = pd.to_datetime(identifications['start_hms'], format='%H:%M:%S').dt.time
    identifications['end_hms'] = pd.to_datetime(identifications['end_hms'], format='%H:%M:%S').dt.time
    
    remain_cols = ['start_hms', 'end_hms', 'transcription']
    if 'se_speaker' in identifications.columns:
        remain_cols.append('se_speaker')
    if 'trainer_dist' in identifications.columns:
        remain_cols.append('trainer_dist')
    if 'trainee_dist' in identifications.columns:
        remain_cols.append('trainee_dist')
    identifications.drop(columns=[col for col in identifications.columns if col not in remain_cols], inplace=True)
    identifications = identifications[identifications['start_hms'] > pd.Timestamp(hms).time()]
    identifications.reset_index(drop=True, inplace=True)
    
    return identifications[remain_cols]
    
df_after_hms('results/extract_dialogue/transcriptions/LFB18_full.csv', '00:02:00').iloc[0:200]

Unnamed: 0,start_hms,end_hms,transcription
0,00:02:05,00:02:05,Yeah.
1,00:02:07,00:02:08,and go down more.
2,00:02:15,00:02:18,And if you need to adjust your tracks and do it.
3,00:02:24,00:02:24,So I would extend.
4,00:02:24,00:02:25,
...,...,...,...
195,00:24:47,00:24:49,"It's still banned, stuck to it."
196,00:24:50,00:24:50,Yep.
197,00:25:48,00:25:49,"Down to the left, please."
198,00:25:51,00:25:52,"Yep, right here. Perfect."


In [5]:
trainer_trainee = 'trainer'
id_ = 'A6'
case = 18
full_audio_path = f'../../full_audios/LFB{case}_full.wav'
start_hms = '00:07:24'
duration = 6
secs = sum(x * int(t) for x, t in zip([3600, 60, 1], start_hms.split(':')))
save_path = f'results/extract_dialogue/anchors/{trainer_trainee}/{id_}/LFB{case}_full_{start_hms}_{duration}.wav'

extract_anchor(
    full_audio_path=full_audio_path,
    save_path=save_path,
    start_hms=start_hms,
    duration=duration
)

Extracting clip from ../../full_audios/LFB18_full.wav to results/extract_dialogue/anchors/trainer/A6/LFB18_full_00:07:24_6.wav
