In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import whisper

# Speech-to-Text

In [4]:
model = whisper.load_model("base")
print(f"Model loaded successfully")

Model loaded successfully


In [10]:
input_audio = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/ReadText/HC"
output_folder = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_transcriptions"

os.makedirs(output_folder, exist_ok=True)

unwanted_words = ["uh", "um", "you know", "so", "start now"]

for filename in os.listdir(input_audio):
    if filename.endswith(".wav"):
        audio_path = os.path.join(input_audio, filename)
        result = model.transcribe(audio_path, language="en")

        output_path = os.path.join(output_folder, filename.replace(".wav", ".txt"))
        with open(output_path, "w") as f:
            f.write(result["text"])


In [11]:
input_audio = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/ReadText/PD"
output_folder = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_transcriptions"

os.makedirs(output_folder, exist_ok=True)

unwanted_words = ["uh", "um", "you know", "so", "start now"]

for filename in os.listdir(input_audio):
    if filename.endswith(".wav"):
        audio_path = os.path.join(input_audio, filename)
        result = model.transcribe(audio_path, language="en")

        output_path = os.path.join(output_folder, filename.replace(".wav", ".txt"))
        with open(output_path, "w") as f:
            f.write(result["text"])

In [13]:
input_audio = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/SpontaneousDialogue/HC"
output_folder = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_transcriptions"

os.makedirs(output_folder, exist_ok=True)

unwanted_words = ["uh", "um", "you know", "so", "start now"]

for filename in os.listdir(input_audio):
    if filename.endswith(".wav"):
        audio_path = os.path.join(input_audio, filename)
        result = model.transcribe(audio_path, language="en")

        output_path = os.path.join(output_folder, filename.replace(".wav", ".txt"))
        with open(output_path, "w") as f:
            f.write(result["text"])

In [14]:
input_audio = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/SpontaneousDialogue/PD"
output_folder = "/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_transcriptions"

os.makedirs(output_folder, exist_ok=True)

unwanted_words = ["uh", "um", "you know", "so", "start now"]

for filename in os.listdir(input_audio):
    if filename.endswith(".wav"):
        audio_path = os.path.join(input_audio, filename)
        result = model.transcribe(audio_path, language="en")

        output_path = os.path.join(output_folder, filename.replace(".wav", ".txt"))
        with open(output_path, "w") as f:
            f.write(result["text"])

# Melspectrogram Extraction

In [12]:
import librosa
from pathlib import Path

In [15]:
audio_path = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/ReadText/HC")
out_DIR    = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_Spectrogram")

SR = 16000
WINDOW_SEC =0.032
OVERLAP = 0.50 
FFT_SIZE = 1024
N_MELS = 80
FMIN = 0
FMAX = SR//2
POWER = 2.0
MONO = True
WINDOW = "hann"
CENTER = False
EXTS = [".wav"]



WIN_SAMPLES = int(round(WINDOW_SEC * SR))
HOP_SAMPLES = int(round(WIN_SAMPLES * (1- OVERLAP)))
FRAME_SEC = HOP_SAMPLES / SR

print(f"win_length ={WIN_SAMPLES} hop_length={HOP_SAMPLES} n_fft={FFT_SIZE} frame_sec={FRAME_SEC:.3f}s")

win_length =512 hop_length=256 n_fft=1024 frame_sec=0.016s


In [16]:
def find_audio_files(root: Path):
    return [p for p in root.rglob("*") if p.suffix.lower() in EXTS and p.is_file()]

def ensure_dir(path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)

def extract_logmel(y: np.ndarray, sr: int) -> np.ndarray:
    # Mel spectrogram with your analysis settings
    S = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_fft=FFT_SIZE,
        hop_length=HOP_SAMPLES,
        win_length=WIN_SAMPLES,
        n_mels=N_MELS,
        fmin=FMIN,
        fmax=FMAX,
        power=POWER,
        center=CENTER,
        window=WINDOW,
    )
    S_db = librosa.power_to_db(S, ref=np.max)   # log-mel dB
    return S_db.astype(np.float32)   

def main():
    out_DIR.mkdir(parents=True, exist_ok=True)
    audio_files = find_audio_files(audio_path)
    print(f"Found {len(audio_files)} audio files under {audio_path}")

    for i, src in enumerate(sorted(audio_files), 1):
        rel = src.relative_to(audio_path)
        dst = (out_DIR / rel).with_suffix(".npy")
        ensure_dir(dst)

        if dst.exists():
            print(f"[{i}/{len(audio_files)}] Skip (exists): {dst}")
            continue

        try:
            y, sr = librosa.load(src, sr=SR, mono=MONO)
            mel = extract_logmel(y, sr)
            np.save(dst, mel)
            print(f"[{i}/{len(audio_files)}] Saved: {dst}  shape={mel.shape}")
        except Exception as e:
            print(f"[{i}/{len(audio_files)}] ERROR: {src} -> {e}")

if __name__ == "__main__":
    main()
# [n_mels, n_frames]


Found 21 audio files under /mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/ReadText/HC
[1/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_Spectrogram/ID00_hc_0_0_0.npy  shape=(80, 9441)
[2/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_Spectrogram/ID01_hc_0_0_0.npy  shape=(80, 10253)
[3/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_Spectrogram/ID03_hc_0_0_0.npy  shape=(80, 8719)
[4/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_Spectrogram/ID05_hc_0_0_0.npy  shape=(80, 6929)
[5/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_Spectrogram/ID08_hc_0_0_0.npy  shape=(80, 9128)
[6/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_Spectrogram/ID09_hc_0_0_0.npy  shape=(80, 7839)
[7/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_ReadText_S

In [17]:
audio_path = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/ReadText/PD")
out_DIR    = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_Spectrogram")

def find_audio_files(root: Path):
    return [p for p in root.rglob("*") if p.suffix.lower() in EXTS and p.is_file()]

def ensure_dir(path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)

def extract_logmel(y: np.ndarray, sr: int) -> np.ndarray:
    # Mel spectrogram with your analysis settings
    S = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_fft=FFT_SIZE,
        hop_length=HOP_SAMPLES,
        win_length=WIN_SAMPLES,
        n_mels=N_MELS,
        fmin=FMIN,
        fmax=FMAX,
        power=POWER,
        center=CENTER,
        window=WINDOW,
    )
    S_db = librosa.power_to_db(S, ref=np.max)   # log-mel dB
    return S_db.astype(np.float32)   

def main():
    out_DIR.mkdir(parents=True, exist_ok=True)
    audio_files = find_audio_files(audio_path)
    print(f"Found {len(audio_files)} audio files under {audio_path}")

    for i, src in enumerate(sorted(audio_files), 1):
        rel = src.relative_to(audio_path)
        dst = (out_DIR / rel).with_suffix(".npy")
        ensure_dir(dst)

        if dst.exists():
            print(f"[{i}/{len(audio_files)}] Skip (exists): {dst}")
            continue

        try:
            y, sr = librosa.load(src, sr=SR, mono=MONO)
            mel = extract_logmel(y, sr)
            np.save(dst, mel)
            print(f"[{i}/{len(audio_files)}] Saved: {dst}  shape={mel.shape}")
        except Exception as e:
            print(f"[{i}/{len(audio_files)}] ERROR: {src} -> {e}")

if __name__ == "__main__":
    main()
# [n_mels, n_frames]


Found 16 audio files under /mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/ReadText/PD
[1/16] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_Spectrogram/ID02_pd_2_0_0.npy  shape=(80, 9844)
[2/16] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_Spectrogram/ID04_pd_2_0_1.npy  shape=(80, 7652)
[3/16] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_Spectrogram/ID06_pd_3_1_1.npy  shape=(80, 11179)
[4/16] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_Spectrogram/ID07_pd_2_0_0.npy  shape=(80, 9226)
[5/16] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_Spectrogram/ID13_pd_3_2_2.npy  shape=(80, 5832)
[6/16] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_Spectrogram/ID16_pd_2_0_0.npy  shape=(80, 10450)
[7/16] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_ReadText_

In [18]:
audio_path = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/SpontaneousDialogue/HC")
out_DIR    = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_Spectrogram")

def find_audio_files(root: Path):
    return [p for p in root.rglob("*") if p.suffix.lower() in EXTS and p.is_file()]

def ensure_dir(path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)

def extract_logmel(y: np.ndarray, sr: int) -> np.ndarray:
    # Mel spectrogram with your analysis settings
    S = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_fft=FFT_SIZE,
        hop_length=HOP_SAMPLES,
        win_length=WIN_SAMPLES,
        n_mels=N_MELS,
        fmin=FMIN,
        fmax=FMAX,
        power=POWER,
        center=CENTER,
        window=WINDOW,
    )
    S_db = librosa.power_to_db(S, ref=np.max)   # log-mel dB
    return S_db.astype(np.float32)   

def main():
    out_DIR.mkdir(parents=True, exist_ok=True)
    audio_files = find_audio_files(audio_path)
    print(f"Found {len(audio_files)} audio files under {audio_path}")

    for i, src in enumerate(sorted(audio_files), 1):
        rel = src.relative_to(audio_path)
        dst = (out_DIR / rel).with_suffix(".npy")
        ensure_dir(dst)

        if dst.exists():
            print(f"[{i}/{len(audio_files)}] Skip (exists): {dst}")
            continue

        try:
            y, sr = librosa.load(src, sr=SR, mono=MONO)
            mel = extract_logmel(y, sr)
            np.save(dst, mel)
            print(f"[{i}/{len(audio_files)}] Saved: {dst}  shape={mel.shape}")
        except Exception as e:
            print(f"[{i}/{len(audio_files)}] ERROR: {src} -> {e}")

if __name__ == "__main__":
    main()
# [n_mels, n_frames]


Found 21 audio files under /mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/SpontaneousDialogue/HC
[1/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_Spectrogram/ID00_hc_0_0_0.npy  shape=(80, 7486)
[2/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_Spectrogram/ID01_hc_0_0_0.npy  shape=(80, 7314)
[3/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_Spectrogram/ID03_hc_0_0_0.npy  shape=(80, 9629)
[4/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_Spectrogram/ID05_hc_0_0_0.npy  shape=(80, 11527)
[5/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_Spectrogram/ID08_hc_0_0_0.npy  shape=(80, 8021)
[6/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/HC_Spontaneous_Spectrogram/ID09_hc_0_0_0.npy  shape=(80, 8827)
[7/21] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkin

In [19]:
audio_path = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/SpontaneousDialogue/PD")
out_DIR    = Path("/mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_Spectrogram")

def find_audio_files(root: Path):
    return [p for p in root.rglob("*") if p.suffix.lower() in EXTS and p.is_file()]

def ensure_dir(path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)

def extract_logmel(y: np.ndarray, sr: int) -> np.ndarray:
    # Mel spectrogram with your analysis settings
    S = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_fft=FFT_SIZE,
        hop_length=HOP_SAMPLES,
        win_length=WIN_SAMPLES,
        n_mels=N_MELS,
        fmin=FMIN,
        fmax=FMAX,
        power=POWER,
        center=CENTER,
        window=WINDOW,
    )
    S_db = librosa.power_to_db(S, ref=np.max)   # log-mel dB
    return S_db.astype(np.float32)   

def main():
    out_DIR.mkdir(parents=True, exist_ok=True)
    audio_files = find_audio_files(audio_path)
    print(f"Found {len(audio_files)} audio files under {audio_path}")

    for i, src in enumerate(sorted(audio_files), 1):
        rel = src.relative_to(audio_path)
        dst = (out_DIR / rel).with_suffix(".npy")
        ensure_dir(dst)

        if dst.exists():
            print(f"[{i}/{len(audio_files)}] Skip (exists): {dst}")
            continue

        try:
            y, sr = librosa.load(src, sr=SR, mono=MONO)
            mel = extract_logmel(y, sr)
            np.save(dst, mel)
            print(f"[{i}/{len(audio_files)}] Saved: {dst}  shape={mel.shape}")
        except Exception as e:
            print(f"[{i}/{len(audio_files)}] ERROR: {src} -> {e}")

if __name__ == "__main__":
    main()
# [n_mels, n_frames]


Found 15 audio files under /mnt/d/Roshidat_Msc_Project/Audio_parkinson/MDVR-KCL_Dataset/26_29_09_2017_KCL/26-29_09_2017_KCL/SpontaneousDialogue/PD
[1/15] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_Spectrogram/ID02_pd_2_0_0.npy  shape=(80, 11833)
[2/15] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_Spectrogram/ID04_pd_2_0_1.npy  shape=(80, 9177)
[3/15] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_Spectrogram/ID06_pd_3_1_1.npy  shape=(80, 8017)
[4/15] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_Spectrogram/ID07_pd_2_0_0.npy  shape=(80, 13076)
[5/15] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_Spectrogram/ID13_pd_3_2_2.npy  shape=(80, 12462)
[6/15] Saved: /mnt/d/Roshidat_Msc_Project/Audio_parkinson/pd&Hc_multi/PD_Spontaneous_Spectrogram/ID16_pd_2_0_0.npy  shape=(80, 9784)
[7/15] Saved: /mnt/d/Roshidat_Msc_Project/Audio_park