In [6]:
import os
import wave
import json
import nltk
import pandas as pd
from pathlib import Path
from vosk import Model, KaldiRecognizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
model_path = "vosk-model-en-us-0.22"
audio_folder = "recordings"  

In [4]:
if not os.path.exists(model_path):
    raise FileNotFoundError("Vosk model folder not found!")

In [5]:
if not os.path.exists(audio_folder):
    raise FileNotFoundError("rec folder not found!")

In [6]:
vosk_model = Model(model_path)
sia = SentimentIntensityAnalyzer()
nltk.download('vader_lexicon', quiet=True)

In [5]:
def analyze_call(file_path):
    """Transcribes a single audio file and returns text + sentiment."""
    try:
        wf = wave.open(str(file_path), "rb")
        
        # Vosk validation
        if wf.getnchannels() != 1:
            return "Error: Not Mono", 0, "N/A"

        rec = KaldiRecognizer(vosk_model, wf.getframerate())
        transcript = ""
        
        while True:
            data = wf.readframes(4000)
            if len(data) == 0: break
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                transcript += res.get('text', '') + " "
        
        final = json.loads(rec.FinalResult())
        transcript = (transcript + final.get('text', '')).strip()

        # Sentiment Analysis
        score = sia.polarity_scores(transcript)['compound']
        label = "Positive" if score >= 0.05 else "Negative" if score <= -0.05 else "Neutral"
        
        return transcript, score, label
    except Exception as e:
        return f"Error: {str(e)}", 0, "Error"

In [6]:
results_list = []

audio_files = list(Path(audio_folder).glob("*.wav"))
print(f"Found {len(audio_files)} recordings. Starting processing...")

Found 20 recordings. Starting processing...


In [7]:
for i, file_path in enumerate(audio_files):
    print(f"[{i+1}/{len(audio_files)}] Processing {file_path.name}...")
    
    txt, score, label = analyze_call(file_path)
    
    # Save data in dictionary
    results_list.append({
        "File_Name": file_path.name,
        "Transcript": txt,
        "Sentiment_Score": score,
        "Sentiment_Label": label
    })

[1/20] Processing call_recording_01.wav...
[2/20] Processing call_recording_02.wav...
[3/20] Processing call_recording_03.wav...
[4/20] Processing call_recording_04.wav...
[5/20] Processing call_recording_05.wav...
[6/20] Processing call_recording_06.wav...
[7/20] Processing call_recording_07.wav...
[8/20] Processing call_recording_08.wav...
[9/20] Processing call_recording_09.wav...
[10/20] Processing call_recording_10.wav...
[11/20] Processing call_recording_11.wav...
[12/20] Processing call_recording_12.wav...
[13/20] Processing call_recording_13.wav...
[14/20] Processing call_recording_14.wav...
[15/20] Processing call_recording_15.wav...
[16/20] Processing call_recording_16.wav...
[17/20] Processing call_recording_17.wav...
[18/20] Processing call_recording_18.wav...
[19/20] Processing call_recording_19.wav...
[20/20] Processing call_recording_20.wav...


In [10]:
df_results = pd.DataFrame(results_list)
df_results.head()

Unnamed: 0,File_Name,Transcript,Sentiment_Score,Sentiment_Label
0,call_recording_01.wav,hello i'm sarah miller i'm calling to inquire ...,0.0,Neutral
1,call_recording_02.wav,i am extremely dissatisfied with my recent ord...,-0.6174,Negative
2,call_recording_03.wav,hi this is maria rodriguez i'm having trouble ...,0.9215,Positive
3,call_recording_04.wav,i just wanted to call and say how pleased i am...,0.9611,Positive
4,call_recording_05.wav,hello my name is jessica brown i'd like to pla...,0.7579,Positive
