# Slicing File

In [17]:
# get file
import csv
DEBATES_FILE = './list_of_debates.csv'

with open(DEBATES_FILE, 'r') as f:
    reader = csv.DictReader(f)
    list_of_debates = {}
    for row in reader:
        list_of_debates[row['name']] = row

single_debate = list_of_debates["Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC"]

audio_file = f"./debates/{single_debate['name']}/audio.wav"
audio_dir = f"./debates/{single_debate['name']}/speeches/audio_files/"
speeches = ["1ac", "1nc", "2ac", "2nc", "1nr", "1ar", "2nr", "2ar"]

In [18]:
# slicing file
import sox

# slice based off speech
def slice_audio(audio_file, start_time, speech):
    # convert time from "hh:mm:ss"
    start_time = sum([int(x) * 60 ** i for i, x in enumerate(start_time.split(":")[::-1])])
    if speech[-1] == 'c':
        end_time = start_time + 9 * 60
    else:
        end_time = start_time + 6 * 60
    output_file = f"{audio_dir}{speech}.wav"
    tfm = sox.Transformer()
    tfm.trim(start_time, end_time)
    tfm.build(audio_file, output_file)
    
for speech in speeches:
    slice_audio(audio_file, single_debate[speech], speech)

output_file: ./debates/Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC/speeches/audio_files/1ac.wav already exists and will be overwritten on build
output_file: ./debates/Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC/speeches/audio_files/1nc.wav already exists and will be overwritten on build
output_file: ./debates/Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC/speeches/audio_files/2ac.wav already exists and will be overwritten on build
output_file: ./debates/Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC/speeches/audio_files/2nc.wav already exists and will be overwritten on build
output_file: ./debates/Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC/speeches/audio_files/1nr.wav already exists and will be overwritten on build
output_file: ./debates/Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC/speeches/audio_files/1ar.wav already exists and will be overwritten on build
output_file: ./debates/Dartmouth RR 2024 - Round 4 -

# Transcription

In [19]:
# load jax env variables for mac
from dotenv import load_dotenv

load_dotenv()

# test
import jax
print(jax.numpy.arange(10))

[0 1 2 3 4 5 6 7 8 9]


In [15]:
# import pipeline
from whisper_jax import FlaxWhisperPipline

# instantiate pipeline
pipeline = FlaxWhisperPipline("openai/whisper-large-v2")



In [4]:
# JIT compile the forward call - slow, but we only do once
text = pipeline("./debates/Dartmouth RR 2024 - Round 4 - Michigan PD vs Dartmouth BC/audio_trimmed.wav")

In [20]:
# used cached function thereafter - super fast!!
from concurrent.futures import ThreadPoolExecutor
import os

transcription_dir = f"./debates/{single_debate['name']}/speeches/transcriptions/"
os.makedirs(transcription_dir, exist_ok=True)
speeches = ["1nr", "1ar", "2nr", "2ar"]

def transcribe_and_save(speech, pipeline):
    text = pipeline(f"{audio_dir}/{speech}.wav", task="transcribe")
    # save the text to a file
    with open(f"{transcription_dir}/{speech}.txt", "w") as f:
        f.write(text["text"])
    print(f"Transcribed {speech}")

# Use ThreadPoolExecutor to transcribe each speech in parallel
with ThreadPoolExecutor(max_workers=4) as executor:
    futures = [executor.submit(transcribe_and_save, speech, pipeline) for speech in speeches]

In [21]:
print(text["text"])

 Photoaffirmatives to embrace an Arctic-ed view of planet politics is the best method for solving the red flags. The planet prevents extinction through miscalculated accidental war in the Arctic versus threats of extinguished accidents, co-nuclear and close Russia to first strike the United States, due to fears which the planet certainly solves. Though, red flag, I would highlight, is not a negative part. It indicates that we should abandon first strike postures in the Arctic, and articulates that the vagueness of American first strike policy is a key reason. It does not say that we should get rid of all of our noose, which means that it does not go negative. It proves that the permutation is sufficient to solve the terms failure in the Arctic for the plan, as well as everything else that would be adopted of the alternative to the permutation. It was not perceived perfectly as new, and the two in orange strike it as though it were a buoyancy car. It was that presenting the threat of a 