In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!sudo apt-get update -qq
!sudo apt-get install -qq -y ffmpeg

In [None]:
!pip install -q git+https://github.com/m-bain/whisperx.git
!pip install -q faster-whisper

In [None]:
import whisperx
import torch
import gc
import traceback
from google.colab import files, drive
import os
import shutil


In [None]:
language_code = "en"
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "int8" if device == "cpu" else "float16"

In [None]:
audio_dir = "/content/drive/MyDrive/fMRI/stimuli"
audio_files = [f for f in os.listdir(audio_dir) if f.endswith(".wav")]

In [None]:
model_a, metadata = whisperx.load_align_model(language_code=language_code, device=device)

In [None]:
for file in audio_files:
    audio_file = os.path.join(audio_dir, file)
    transcript_file = os.path.join(audio_dir, file.replace(".wav", "_transcript.txt"))
    output_tsv_file = os.path.join(audio_dir, file.replace(".wav", "_alignment_output.tsv"))

    print(f"Loading audio from: {audio_file}")
    print(f"Loading transcript from: {transcript_file}")


    try:

      audio = whisperx.load_audio(audio_file)

      audio_duration = audio.shape[0] / 16000.0

      with open(transcript_file, 'r', encoding='utf-8') as f:
          transcript_text = f.read()


    # process the transcript
      result_transcribe = {
          "segments": [{
              "text": transcript_text,
              "start": 0,
              "end": audio_duration
         }]
      }

      # Alignment
      print("Aligning transcription...")
      result_aligned = whisperx.align(
          result_transcribe["segments"],
          model_a,
          metadata,
          audio,
          device,
          return_char_alignments=False
     )

      # Get word segments and confirm alignment
      word_segments = result_aligned.get("word_segments", [])
      if not word_segments:
          print("Alignment failed: No word segments found.")
          print("Full aligned result:", result_aligned)
      else:
         # Write/format and save to .tsv
          print(f"\n--- Alignment Complete ---")
          print(f"Saving word-level timestamps to {output_tsv_file}...")

          with open(output_tsv_file, 'w', encoding='utf-8') as f:

              f.write("word\tstart\tend\n")

              for segment in word_segments:
                  word = segment.get('word', 'N/A').strip()
                  start_time = segment.get('start', 'N/A')
                  end_time = segment.get('end', 'N/A')

                  if isinstance(start_time, float) and isinstance(end_time, float):
                      f.write(f"{word}\t{start_time:.3f}\t{end_time:.3f}\n")
                  else:
                      # Handle cases where timestamps might be missing
                      f.write(f"{word}\t{start_time}\t{end_time}\n")

          print("Successfully saved the output.")

    except Exception as e:
        print(f"\nAn error occurred: {e}")
        print("\n--- Traceback ---")
        traceback.print_exc()
        print("-----------------")

    finally:
        print("\nCleaning up memory...")
        gc.collect()
        torch.cuda.empty_cache()


In [None]:
audio_dir = "/content/drive/MyDrive/fMRI/stimuli"

audio_recording_dir = os.path.join(audio_dir, "original_audio_recordings")
transcript_dir = os.path.join(audio_dir, "transcripts")
tsv_dir = os.path.join(audio_dir, "force_aligned_perword_timestamp")

os.makedirs(audio_recording_dir, exist_ok=True)
os.makedirs(transcript_dir, exist_ok=True)
os.makedirs(tsv_dir, exist_ok=True)

for file in os.listdir(audio_dir):
    file_path = os.path.join(audio_dir, file)
    if os.path.isfile(file_path):
        if file.endswith(".wav"):
            shutil.move(file_path, os.path.join(audio_recording_dir, file))
        elif file.endswith(".txt"):
            shutil.move(file_path, os.path.join(transcript_dir, file))
        elif file.endswith(".tsv"):
            shutil.move(file_path, os.path.join(tsv_dir, file))

In [None]:
def count_files_in_folder(folder_path):
    return len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])

num_audio_files = count_files_in_folder(audio_dir)
num_transcript_files = count_files_in_folder(transcript_dir)
num_tsv_files = count_files_in_folder(tsv_dir)


print(f"Number of .wav files in 'original_audio_recordings': {num_audio_files}")
print(f"Number of .txt files in 'transcripts': {num_transcript_files}")
print(f"Number of .tsv files in 'force_aligned_perword_timestamp': {num_tsv_files}")


In [None]:
transcript_files = {os.path.splitext(f.strip())[0].lower() for f in os.listdir(transcript_dir) if f.endswith(".txt")}
tsv_files = {os.path.splitext(f.strip())[0].lower() for f in os.listdir(tsv_dir) if f.endswith(".tsv")}


print(f"Transcript files (count {len(transcript_files)}): {sorted(transcript_files)}")
print(f"TSV files (count {len(tsv_files)}): {sorted(tsv_files)}")



In [None]:
tlist = []
tslist = []
for file in transcript_files:
  tlist.append(file[:-11])
for tfile in tsv_files:
  tslist.append(file[:-17])