In [None]:
! pip install gradio

import spacy
import re
from transformers import pipeline
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr

Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.3 (from gradio)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [None]:

# Load spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    import spacy.cli
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# Initialize sentiment analysis pipeline from Hugging Face
sentiment_pipeline = pipeline("sentiment-analysis")

# Clean transcript text function
def clean_text(text):
    text = re.sub(r'Sales Rep.*?:', '', text)
    text = re.sub(r'Customer.*?:', '', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text.strip()

# Main function to analyze transcript
def analyze_transcript(transcript):
    # Clean and process transcript
    cleaned_transcript = clean_text(transcript)
    doc = nlp(cleaned_transcript)

    # Run sentiment analysis on each sentence
    sentences = [sent.text for sent in doc.sents]
    sentiment_results = sentiment_pipeline(sentences)

    # Create DataFrame for results
    df = pd.DataFrame({
        'Sentence': sentences,
        'Sentiment': [result['label'] for result in sentiment_results],
        'Score': [result['score'] for result in sentiment_results]
    })

    # Extract Named Entities and Keywords
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    keywords = [token.text for token in doc if token.pos_ in ["NOUN", "ADJ"]]

    # Determine overall sentiment and customer satisfaction
    overall_sentiment = df['Sentiment'].mode()[0] if not df.empty else "NEUTRAL"
    satisfaction = "Satisfied" if overall_sentiment == "POSITIVE" else "Dissatisfied" if overall_sentiment == "NEGATIVE" else "Neutral"

    # Extract potential reason for the call
    reason_keywords = ['support', 'complaint', 'inquiry', 'feedback', 'cancellation', 'issue', 'billing', 'purchase']
    reason_for_call = "General Inquiry"
    for word in keywords:
        if word.lower() in reason_keywords:
            reason_for_call = word.capitalize()
            break

    # Generate sentiment score plot
    plt.figure(figsize=(10, 6))
    df['Score'].plot(kind='bar', color=['green' if s == 'POSITIVE' else 'red' for s in df['Sentiment']])
    plt.xticks(range(len(sentences)), sentences, rotation=90)
    plt.xlabel("Sentence")
    plt.ylabel("Sentiment Score")
    plt.title("Sentiment Analysis of Sales Transcript")
    plt.tight_layout()
    plot_filename = "sentiment_analysis_plot.png"
    plt.savefig(plot_filename)
    plt.close()

    # Compile analysis summary
    analysis_summary = {
        "Overall Sentiment": overall_sentiment,
        "Customer Satisfaction": satisfaction,
        "Reason for Call": reason_for_call,
        "Entities": entities,
        "Keywords": keywords
    }

    return df.to_markdown(), analysis_summary, plot_filename

# Gradio Interface
iface = gr.Interface(
    fn=analyze_transcript,
    inputs="text",
    outputs=["markdown", "json", "image"],
    title="Sentiment Analysis for Sales Transcript",
    description="Analyze the sentiment of a sales transcript, extract entities, and visualize sentiment scores.",
    examples=[
        ["Sales Rep: Hello, how are you today?\nCustomer: I'm good, thanks! Just checking on product availability."],
        ["Sales Rep: Thank you for calling. How can I assist?\nCustomer: I’m facing issues with my billing."],
        ["Sales Rep: Can I help you with anything else today?\nCustomer: No, just wanted to provide some feedback!"]
    ]
)

# Launch the interface
iface.launch()


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7efd18681179afc034.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


