## This notebook implements a comprehensive speech processing pipeline with:

- Speech Recognition
- Speaker Diarization
- Emotion Detection
- Interactive Streamlit Interface

## 1. Installation and Setup

#### Install required dependencies and libraries

In [1]:
# Install necessary libraries
!pip install transformers torchaudio pyannote.audio speechbrain
!pip install torch
!pip install tqdm

Collecting pyannote.audio
  Downloading pyannote.audio-3.3.2-py2.py3-none-any.whl.metadata (11 kB)
Collecting speechbrain
  Downloading speechbrain-1.0.3-py3-none-any.whl.metadata (24 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/pip/_internal/cli/base_command.py", line 179, in exc_logging_wrapper
    status = run_func(*args)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pip/_internal/cli/req_command.py", line 67, in wrapper
    return func(self, options, args)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pip/_internal/commands/install.py", line 362, in run
^C


In [1]:
!pip install requests transformers torch torchaudio pyannote.audio speechbrain tqdm



In [2]:
!pip install google-generativeai



In [3]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.44.1-py3-none-any.whl (9.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m71.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [4]:
!pip install npx

Collecting npx
  Downloading npx-0.1.6-py3-none-any.whl.metadata (7.0 kB)
Downloading npx-0.1.6-py3-none-any.whl (10 kB)
Installing collected packages: npx
Successfully installed npx-0.1.6


In [5]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K
added 22 packages in 4s
[1G[0K⠧[1G[0K
[1G[0K⠧[1G[0K3 packages are looking for funding
[1G[0K⠧[1G[0K  run `npm fund` for details
[1G[0K⠧[1G[0K

In [6]:
!pip install gtts

Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.4


## 2. Import Required Libraries

In [7]:
# Import required modules
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
from pyannote.audio import Pipeline
from speechbrain.pretrained import EncoderClassifier
from tqdm.notebook import tqdm

DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _speechbrain_save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _speechbrain_load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _recover
  from speechbrain.pretrained import EncoderClassifier


## 3. Initialize Speaker Diarization Pipeline

In [8]:
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
                                    use_auth_token="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")

config.yaml:   0%|          | 0.00/469 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/5.91M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/399 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/221 [00:00<?, ?B/s]

## 4. Streamlit Application Configuration

### Creating an interactive web interface for the speech processing pipeline

In [None]:
%%writefile app.py
import streamlit as st
from io import BytesIO
import torchaudio
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
from pyannote.audio import Pipeline
from speechbrain.inference import EncoderClassifier
import google.generativeai as genai
from gtts import gTTS
from collections import Counter
import base64
import plotly.express as px
import pandas as pd
import json
import re

# Set page configuration - MUST be the first Streamlit command
st.set_page_config(
    page_title="Call Analysis Dashboard",
    layout="wide",
    page_icon="🎙️",
    initial_sidebar_state="expanded"
)

# Configure Gemini API
API_KEY = "YOUR-GEMINI-API-KEY"
genai.configure(api_key=API_KEY)

# Custom CSS for enhanced styling
st.markdown("""
    <style>
    /* Main container styling */
    .main {
        padding: 2rem;
        background-color: #f5f5f5;
    }

    /* Header styling */
    .main-title {
        font-size: 3rem;
        background: linear-gradient(45deg, #2196F3, #4CAF50);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        text-align: center;
        padding: 1.5rem 0;
        font-weight: 800;
        margin-bottom: 1rem;
    }
    .main-quote {
        font-size: 1.5rem;
        color: #555;
        text-align: center;
        padding: 1rem 0;
        font-style: italic;
        margin-bottom: 2rem;
    }

    /* Section headers */
    .section-header {
        font-size: 1.8rem;
        color: #1565C0;
        margin: 1.5rem 0;
        padding: 0.5rem;
        border-bottom: 3px solid #1565C0;
    }

    /* Cards styling */
    .stCard {
        border-radius: 1rem;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
        padding: 1.5rem;
        margin: 1rem 0;
        background: white;
    }

    /* Button styling */
    .stButton>button {
        background: linear-gradient(45deg, #2196F3, #4CAF50);
        color: white;
        border: none;
        border-radius: 0.5rem;
        padding: 0.75rem 1.5rem;
        font-weight: 600;
        transition: all 0.3s ease;
    }

    .stButton>button:hover {
        transform: translateY(-2px);
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
    }

    /* File uploader styling */
    .uploadedFile {
        border: 2px dashed #4CAF50;
        border-radius: 1rem;
        padding: 1rem;
        text-align: center;
        background: #E8F5E9;
    }

    /* Audio player styling */
    .audio-player {
        width: 100%;
        margin: 1rem 0;
        border-radius: 0.5rem;
        background: #f5f5f5;
    }

    /* Indicator styling */
    .indicator {
        padding: 0.5rem 1rem;
        border-radius: 0.25rem;
        font-weight: 600;
        text-align: center;
        margin: 0.5rem 0;
    }
    .positive { background: #E8F5E9; color: #2E7D32; }
    .negative { background: #FFEBEE; color: #C62828; }
    .neutral { background: #E3F2FD; color: #1565C0; }
    </style>
""", unsafe_allow_html=True)

# Initialize models
@st.cache_resource
def load_models():
    processor = Wav2Vec2Processor.from_pretrained('facebook/wav2vec2-large-960h-lv60')
    asr_model = Wav2Vec2ForCTC.from_pretrained('facebook/wav2vec2-large-960h-lv60')
    diarization_pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.1",
        use_auth_token="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"
    )
    emotion_recognizer = EncoderClassifier.from_hparams(
        source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
        savedir="pretrained_models/emotion_recognition"
    )
    sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
    return processor, asr_model, diarization_pipeline, emotion_recognizer, sentiment_analyzer

# LLM-based functions
def identify_speakers_llm(dialogue_text):
    try:
        if not dialogue_text or not dialogue_text.strip():
            print("identify_speakers_llm: Dialogue text is empty")
            st.error("Speaker identification error: Dialogue text is empty")
            return {"agent": "Unknown", "customer": "Unknown"}

        model = genai.GenerativeModel('gemini-2.0-flash')
        prompt = f"""
        You are an expert in conversational analysis. Given the following dialogue where speakers are labeled as SPEAKER_00, SPEAKER_01, etc., identify which speaker is the customer service agent and which is the customer. The agent typically initiates the call with a greeting (e.g., "Thank you for calling"), uses formal language, and responds to queries. The customer often asks questions, describes issues, or expresses emotions like frustration. Return only the JSON object with keys 'agent' and 'customer' mapping to the speaker labels (e.g., 'SPEAKER_00'), with no additional text or markdown.

        Dialogue:
        {dialogue_text}

        Expected output:
        {{"agent": "SPEAKER_XX", "customer": "SPEAKER_YY"}}
        """
        response = model.generate_content(prompt)
        # Log the raw response
        #print("Raw response from identify_speakers_llm:", response.text)
        #st.write("Raw response from identify_speakers_llm:", response.text)

        # Attempt to extract JSON
        if not response.text or not response.text.strip():
            print("identify_speakers_llm: Empty response from Gemini API")
            st.error("Speaker identification error: Empty response from Gemini API")
            return {"agent": "Unknown", "customer": "Unknown"}

        # Try parsing as JSON
        try:
            json_match = re.search(r'\{.*?\}', response.text, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
                return json.loads(json_str)
            else:
                # Fallback: Attempt to parse the response as text
                print("identify_speakers_llm: No JSON found, attempting text parsing")
                if "agent" in response.text.lower() and "customer" in response.text.lower():
                    # Simple heuristic: Look for "agent: SPEAKER_XX, customer: SPEAKER_YY" in the text
                    agent_match = re.search(r'agent[\s:]+(SPEAKER_\d+)', response.text, re.IGNORECASE)
                    customer_match = re.search(r'customer[\s:]+(SPEAKER_\d+)', response.text, re.IGNORECASE)
                    if agent_match and customer_match:
                        return {
                            "agent": agent_match.group(1),
                            "customer": customer_match.group(1)
                        }
                st.error("Speaker identification error: No JSON found in response")
                return {"agent": "Unknown", "customer": "Unknown"}
        except json.JSONDecodeError as e:
            print(f"identify_speakers_llm: JSON parsing error - {e}")
            st.error(f"Speaker identification error: Invalid JSON - {e}")
            return {"agent": "Unknown", "customer": "Unknown"}
    except Exception as e:
        print(f"identify_speakers_llm: General error - {e}")
        st.error(f"Speaker identification error: {e}")
        return {"agent": "Unknown", "customer": "Unknown"}

def extract_queries_llm(dialogue_text, customer_label):
    try:
        if not dialogue_text or not dialogue_text.strip():
            print("extract_queries_llm: Dialogue text is empty")
            st.error("Query extraction error: Dialogue text is empty")
            return []

        model = genai.GenerativeModel('gemini-2.0-flash')
        prompt = f"""
        You are an expert in conversational analysis. Given the following dialogue, identify all queries or issues raised by the customer (labeled as '{customer_label}'). A query is a question, problem, or request for assistance. Return only the JSON array of strings, where each string is a query extracted from the customer's dialogue, with no additional text or markdown.

        Dialogue:
        {dialogue_text}

        Expected output:
        ["query 1", "query 2", ...]
        """
        response = model.generate_content(prompt)
        # Log the raw response
        #print("Raw response from extract_queries_llm:", response.text)
        #st.write("Raw response from extract_queries_llm:", response.text)

        # Attempt to extract JSON
        if not response.text or not response.text.strip():
            print("extract_queries_llm: Empty response from Gemini API")
            st.error("Query extraction error: Empty response from Gemini API")
            return []

        try:
            json_match = re.search(r'\[.*?\]', response.text, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
                return json.loads(json_str)
            else:
                # Fallback: Attempt to parse the response as text
                print("extract_queries_llm: No JSON found, attempting text parsing")
                # Look for numbered or bulleted list of queries
                query_lines = re.findall(r'^-?\s*(.+?)(?=\n|$)', response.text, re.MULTILINE)
                if query_lines:
                    return [line.strip('- ').strip() for line in query_lines if line.strip()]
                st.error("Query extraction error: No JSON found in response")
                return []
        except json.JSONDecodeError as e:
            print(f"extract_queries_llm: JSON parsing error - {e}")
            st.error(f"Query extraction error: Invalid JSON - {e}")
            return []
    except Exception as e:
        print(f"extract_queries_llm: General error - {e}")
        st.error(f"Query extraction error: {e}")
        return []

def track_query_resolution_llm(dialogue_text, queries, agent_label):
    try:
        if not dialogue_text or not dialogue_text.strip():
            print("track_query_resolution_llm: Dialogue text is empty")
            st.error("Query resolution tracking error: Dialogue text is empty")
            return []

        model = genai.GenerativeModel('gemini-2.0-flash')
        prompt = f"""
        You are an expert in conversational analysis. Given the following dialogue and a list of customer queries, determine which queries were resolved by the agent (labeled as '{agent_label}'). A query is resolved if the agent provides a solution, confirms it is fixed, or the customer acknowledges resolution. Return only the JSON array of strings, listing the resolved queries, with no additional text or markdown.

        Dialogue:
        {dialogue_text}

        Queries:
        {json.dumps(queries)}

        Expected output:
        ["resolved query 1", "resolved query 2", ...]
        """
        response = model.generate_content(prompt)
        # Log the raw response
        #print("Raw response from track_query_resolution_llm:", response.text)
        #st.write("Raw response from track_query_resolution_llm:", response.text)

        # Attempt to extract JSON
        if not response.text or not response.text.strip():
            print("track_query_resolution_llm: Empty response from Gemini API")
            st.error("Query resolution tracking error: Empty response from Gemini API")
            return []

        try:
            json_match = re.search(r'\[.*?\]', response.text, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
                return json.loads(json_str)
            else:
                # Fallback: Attempt to parse the response as text
                print("track_query_resolution_llm: No JSON found, attempting text parsing")
                resolved_lines = re.findall(r'^-?\s*(.+?)(?=\n|$)', response.text, re.MULTILINE)
                if resolved_lines:
                    return [line.strip('- ').strip() for line in resolved_lines if line.strip()]
                st.error("Query resolution tracking error: No JSON found in response")
                return []
        except json.JSONDecodeError as e:
            print(f"track_query_resolution_llm: JSON parsing error - {e}")
            st.error(f"Query resolution tracking error: Invalid JSON - {e}")
            return []
    except Exception as e:
        print(f"track_query_resolution_llm: General error - {e}")
        st.error(f"Query resolution tracking error: {e}")
        return []

def detect_competitors_llm(dialogue_text, customer_label):
    try:
        if not dialogue_text or not dialogue_text.strip():
            print("detect_competitors_llm: Dialogue text is empty")
            st.error("Competitor detection error: Dialogue text is empty")
            return []

        model = genai.GenerativeModel('gemini-2.0-flash')
        prompt = f"""
        You are an expert in conversational analysis. Given the following dialogue, identify any mentions of competitor products or companies by the customer (labeled as '{customer_label}'). Return only the JSON array of objects, where each object has 'mention' (the competitor name or product) and 'context' (the full dialogue line containing the mention), with no additional text or markdown.

        Dialogue:
        {dialogue_text}

        Expected output:
        [
            {{"mention": "competitor_name", "context": "dialogue line"}},
            ...
        ]
        """
        response = model.generate_content(prompt)
        # Log the raw response
        #print("Raw response from detect_competitors_llm:", response.text)
        #st.write("Raw response from detect_competitors_llm:", response.text)

        # Attempt to extract JSON
        if not response.text or not response.text.strip():
            print("detect_competitors_llm: Empty response from Gemini API")
            st.error("Competitor detection error: Empty response from Gemini API")
            return []

        try:
            json_match = re.search(r'\[.*?\]', response.text, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
                return json.loads(json_str)
            else:
                # Fallback: Attempt to parse the response as text
                print("detect_competitors_llm: No JSON found, attempting text parsing")
                competitor_lines = re.findall(r'^-?\s*(\w+)\s*:\s*(.+?)(?=\n|$)', response.text, re.MULTILINE)
                if competitor_lines:
                    return [{"mention": mention, "context": context} for mention, context in competitor_lines]
                st.error("Competitor detection error: No JSON found in response")
                return []
        except json.JSONDecodeError as e:
            print(f"detect_competitors_llm: JSON parsing error - {e}")
            st.error(f"Competitor detection error: Invalid JSON - {e}")
            return []
    except Exception as e:
        print(f"detect_competitors_llm: General error - {e}")
        st.error(f"Competitor detection error: {e}")
        return []

def get_dominant_emotion(dialogue_text):
    emotions = [line.split('(')[1].split(',')[0].strip() for line in dialogue_text.split('\n') if '(' in line and ')' in line]
    if emotions:
        emotion_counts = Counter(emotions)
        dominant_emotion = emotion_counts.most_common(1)[0][0]
        emotion_percentages = {emotion: (count/len(emotions))*100 for emotion, count in emotion_counts.items()}
        return dominant_emotion, emotion_percentages
    return None, None

def create_emotion_chart(emotion_percentages):
    if emotion_percentages:
        df = pd.DataFrame({
            'Emotion': list(emotion_percentages.keys()),
            'Percentage': list(emotion_percentages.values())
        })
        fig = px.pie(df, values='Percentage', names='Emotion',
                     title='Emotion Distribution',
                     color_discrete_sequence=px.colors.qualitative.Set3)
        fig.update_traces(textposition='inside', textinfo='percent+label')
        return fig
    return None

def score_agent(agent_sentiments, resolved_queries, total_queries, dialogue_entries):
    score = 0
    if agent_sentiments:
        dominant_sentiment = Counter(agent_sentiments).most_common(1)[0][0]
        if dominant_sentiment == "POSITIVE":
            score += 40
        elif dominant_sentiment == "NEUTRAL":
            score += 20
    resolution_rate = resolved_queries / total_queries if total_queries > 0 else 0
    score += resolution_rate * 40
    politeness_count = sum(1 for entry in dialogue_entries if "Agent" in entry and any(phrase in entry.lower() for phrase in ["thank you", "please", "appreciate"]))
    score += politeness_count * 5
    return min(score, 100)

def process_audio_file(file):
    try:
        processor, asr_model, diarization_pipeline, emotion_recognizer, sentiment_analyzer = load_models()

        # Load and preprocess audio
        signal, fs = torchaudio.load(file)
        if fs != 16000:
            resampler = torchaudio.transforms.Resample(orig_freq=fs, new_freq=16000)
            signal = resampler(signal)
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)

        # Diarization
        diarization = diarization_pipeline({"waveform": signal, "sample_rate": 16000})
        dialogue_entries = []
        customer_sentiments = []
        agent_sentiments = []

        # Process each segment
        for segment, _, speaker in diarization.itertracks(yield_label=True):
            start_sample = int(segment.start * 16000)
            end_sample = int(segment.end * 16000)
            segment_audio = signal[:, start_sample:end_sample]

            # Transcription
            input_values = processor(segment_audio.squeeze(), sampling_rate=16000, return_tensors='pt').input_values
            logits = asr_model(input_values).logits
            predicted_ids = torch.argmax(logits, dim=-1)
            transcription = processor.decode(predicted_ids[0])

            # Emotion recognition
            segment_signal = segment_audio.to(emotion_recognizer.device)
            if segment_signal.ndim == 1:
                segment_signal = segment_signal.unsqueeze(0)
            embeddings = emotion_recognizer.mods.wav2vec2(segment_signal)
            embeddings = emotion_recognizer.mods.avg_pool(embeddings)
            logits_emotion = emotion_recognizer.mods.output_mlp(embeddings)
            probabilities = torch.softmax(logits_emotion, dim=-1)
            predicted_index = torch.argmax(probabilities, dim=-1)
            emotion_label = emotion_recognizer.hparams.label_encoder.decode_torch(predicted_index)[0]

            # Sentiment analysis
            sentiment = sentiment_analyzer(transcription)[0]
            sentiment_label = sentiment['label']

            dialogue_entries.append(f"{speaker} ({emotion_label}, {sentiment_label}): {transcription}")

        dialogue_text = "\n".join(dialogue_entries)
        print("Dialogue text before LLM:", dialogue_text)  # Debug: Log dialogue text
        st.write("Dialogue text before LLM:", dialogue_text)

        # Identify agent and customer using LLM
        speaker_roles = identify_speakers_llm(dialogue_text)
        agent_label = speaker_roles.get('agent', 'Unknown')
        customer_label = speaker_roles.get('customer', 'Unknown')

        # Relabel dialogue entries
        dialogue_entries = [
            entry.replace(agent_label, "Agent").replace(customer_label, "Customer")
            for entry in dialogue_entries
        ]
        dialogue_text = "\n".join(dialogue_entries)

        # Assign sentiments to agent/customer
        for entry in dialogue_entries:
            sentiment_label = entry.split(', ')[1].split(')')[0].strip()
            if "Agent" in entry:
                agent_sentiments.append(sentiment_label)
            elif "Customer" in entry:
                customer_sentiments.append(sentiment_label)

        # Extract queries, resolutions, and competitor mentions using LLM
        queries = extract_queries_llm(dialogue_text, "Customer")
        resolved_queries = track_query_resolution_llm(dialogue_text, queries, "Agent")
        competitor_mentions = detect_competitors_llm(dialogue_text, "Customer")

        # Calculate additional metrics
        customer_sentiment = Counter(customer_sentiments).most_common(1)[0][0] if customer_sentiments else "Neutral"
        agent_sentiment = Counter(agent_sentiments).most_common(1)[0][0] if agent_sentiments else "Neutral"
        dominant_emotion, emotion_percentages = get_dominant_emotion(dialogue_text)
        agent_score = score_agent(agent_sentiments, len(resolved_queries), len(queries), dialogue_entries)
        summary = summarize_with_gemini(dialogue_text)

        return (dialogue_text, summary, dominant_emotion, emotion_percentages,
                customer_sentiment, agent_sentiment, queries, len(queries),
                len(resolved_queries), agent_score, competitor_mentions)

    except Exception as e:
        print(f"process_audio_file: Error - {e}")
        st.error(f"Error processing audio file: {e}")
        return ("[Processing failed]", "[Summary generation failed]", None, None,
                "Unknown", "Unknown", [], 0, 0, 0, [])

def summarize_with_gemini(text):
    try:
        if not text or not text.strip():
            print("summarize_with_gemini: Dialogue text is empty")
            st.error("Summarization error: Dialogue text is empty")
            return "[Summary generation failed]"

        model = genai.GenerativeModel('gemini-2.0-flash')
        prompt = f"""
        You are an expert in conversational summarization. Provide a brief, factual summary of this conversation in no more than 30 words, focusing on key points and emotional states.

        Dialogue:
        {text}

        Example: Customer asked about billing issues; agent resolved them. Customer was frustrated, agent remained calm.
        """
        response = model.generate_content(prompt)
        #print("Raw response from summarize_with_gemini:", response.text)  # Debug: Log summary response
        #st.write("Raw response from summarize_with_gemini:", response.text)
        return response.text if response.text else "[Summary generation failed]"
    except Exception as e:
        print(f"summarize_with_gemini: Error - {e}")
        st.error(f"Summarization error: {e}")
        return "[Summary generation failed]"

# Main Application
st.markdown('<h1 class="main-title">🎙️ Call Analysis Dashboard</h1>', unsafe_allow_html=True)
st.markdown('<h4 class="main-quote">"Unlocking Insights from Conversations"</h4>', unsafe_allow_html=True)

# Sidebar with information
with st.sidebar:
    st.markdown("### About")
    st.write("""
    This AI-powered dashboard analyzes customer service calls to provide insights on:
    - Speaker identification (Agent/Customer)
    - Sentiment and emotion analysis
    - Query tracking and resolution
    - Agent performance scoring
    - Competitor mentions
    """)

    st.markdown("### Instructions")
    st.write("""
    1. Upload a WAV file of a customer service call
    2. Wait for processing (may take a minute)
    3. Explore the analysis results
    4. Listen to or download the summary audio
    """)

# Main content
st.markdown('<div class="section-header">Upload Audio</div>', unsafe_allow_html=True)
uploaded_file = st.file_uploader("Upload a WAV file", type=["wav"], label_visibility="collapsed")

if uploaded_file:
    with st.spinner("🔄 Analyzing call audio..."):
        (dialogue_text, summary, dominant_emotion, emotion_percentages,
         customer_sentiment, agent_sentiment, queries, total_queries,
         resolved_queries, agent_score, competitor_mentions) = process_audio_file(uploaded_file)

    # Results section
    st.markdown('<div class="section-header">Analysis Results</div>', unsafe_allow_html=True)

    # Display in columns
    col1, col2 = st.columns([3, 2])

    with col1:
        st.markdown("#### 📝 Transcribed Dialogue")
        st.text_area("", value=dialogue_text, height=250, disabled=True)

        st.markdown("#### 📋 Summary")
        st.text_area("", value=summary, height=100, disabled=True)

        st.markdown("#### ❓ Customer Queries")
        if queries:
            for i, query in enumerate(queries, 1):
                st.write(f"{i}. {query}")
        else:
            st.write("No queries detected.")

    with col2:
        st.markdown("#### 🎭 Sentiments")
        st.markdown(f"""
            <div class="indicator {customer_sentiment.lower()}">
                Customer Sentiment: {customer_sentiment.upper()}
            </div>
        """, unsafe_allow_html=True)
        st.markdown(f"""
            <div class="indicator {agent_sentiment.lower()}">
                Agent Sentiment: {agent_sentiment.upper()}
            </div>
        """, unsafe_allow_html=True)

        st.markdown("#### 🏆 Agent Performance")
        st.progress(agent_score / 100)
        st.write(f"Score: {agent_score}/100")

        st.markdown("#### 📊 Query Resolution")
        st.write(f"Total Queries: {total_queries}")
        st.write(f"Resolved Queries: {resolved_queries}")
        st.write(f"Resolution Rate: {resolved_queries/total_queries*100:.1f}%" if total_queries > 0 else "Resolution Rate: N/A")

    # Additional insights
    st.markdown('<div class="section-header">Additional Insights</div>', unsafe_allow_html=True)
    col3, col4 = st.columns(2)

    with col3:
        st.markdown("#### 🎭 Emotion Distribution")
        emotion_chart = create_emotion_chart(emotion_percentages)
        if emotion_chart:
            st.plotly_chart(emotion_chart, use_container_width=True)
        else:
            st.write("No emotions detected.")

    with col4:
        st.markdown("#### ⚔️ Competitor Mentions")
        if competitor_mentions:
            for mention in competitor_mentions:
                st.write(f"- {mention['mention']} (Context: {mention['context']})")
        else:
            st.write("No competitor mentions detected.")

    # Audio summary
    st.markdown('<div class="section-header">Summary Audio</div>', unsafe_allow_html=True)
    summary_audio = BytesIO()
    gTTS(text=summary, lang="en").write_to_fp(summary_audio)
    summary_audio.seek(0)
    audio_bytes = summary_audio.read()
    audio_b64 = base64.b64encode(audio_bytes).decode()

    st.markdown(f"""
        <audio class="audio-player" controls>
            <source src="data:audio/mp3;base64,{audio_b64}" type="audio/mp3">
            Your browser does not support the audio element.
        </audio>
    """, unsafe_allow_html=True)

    st.download_button(
        label="⬇️ Download Summary Audio",
        data=audio_bytes,
        file_name="summary.mp3",
        mime="audio/mpeg"
    )

Overwriting app.py


## 5. Launch the Application

### Start the Streamlit server with tunnel access

In [None]:
!streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.106.116.93:8501[0m
[0m
your url is: https://lucky-crabs-travel.loca.lt
2025-04-27 06:07:59.532443: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745734079.589414   28095 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745734079.606161   28095 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-27 06:0