In [7]:
import os
import re
import librosa
import torch
import pandas as pd
from torch.nn.utils.rnn import pad_sequence
from datasets import Dataset
from transformers import (
    WhisperProcessor,
    WhisperForConditionalGeneration,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
)
import evaluate
import Levenshtein
import warnings


In [8]:
#INSERT PATH TO YOUR PROVERBS TXT
txt_path = r"C:\PATH\Paroimies.txt"

# Load the text data
def load_target_text(txt_path):
    target_texts = []
    with open(txt_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line:
                match = re.match(r"^\d+\.\s*(.*)", line)
                if match:
                    text = match.group(1).strip()
                    target_texts.append(text)
                else:
                    print(f"Skipping line due to formatting issues: {line}")
    return target_texts

target_texts = load_target_text(txt_path)

In [9]:
# Path to your saved model directory
#INSERT THE PATH TO THE FOLDER OF THE MODEL
model_path = r"C:\PATH\whisper-finetuned2"

# Load the model and processor
model = WhisperForConditionalGeneration.from_pretrained(model_path)
processor = WhisperProcessor.from_pretrained(model_path)


In [10]:
#!pip install  Levenshtein

In [11]:
# Test on unseen audio
def transcribe_audio(file_path):
    audio, sr = librosa.load(file_path, sr=16000)
    input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
    generated_ids = model.generate(input_features)
    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return transcription

def find_nearest_proverb(transcription, target_texts):
    """Find the closest match for the transcription from the list of proverbs."""
    distances = {proverb: Levenshtein.distance(transcription.lower(), proverb.lower()) for proverb in target_texts}
    nearest_proverb = min(distances, key=distances.get)  # Proverb with the smallest distance
    distance = distances[nearest_proverb]
    return nearest_proverb, distance

def transcribe_and_suggest(file_path, target_texts):
    # Step 1: Transcribe the audio
    transcription = transcribe_audio(file_path)
    # Step 2: Find the nearest proverb
    nearest_proverb, distance = find_nearest_proverb(transcription, target_texts)
    # Step 3: Return transcription and the closest proverb
    return transcription, nearest_proverb

#INSERT YOUR AUDIO PATH AS SHOWN
test_audio_path = r"C:\PATH\audio.wav"
transcription, closest_proverb = transcribe_and_suggest(test_audio_path, target_texts)
print("Transcription:", transcription)
print("Do you mean:", closest_proverb)
warnings.filterwarnings("ignore")

Transcription: Apou polla logia o kosmos en dikos tou.
Do you mean: Apou’n antrepetai, o kosmos en dikos tou.
