<a href="https://colab.research.google.com/github/andersonfurtado/AI4WEBDEV/blob/main/ch6/User_Centric_Sentiment_Analysis_with_Personalized_Content.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Instalar bibliotecas necessárias
!pip install SpeechRecognition
!pip install textblob
!pip install pydub
!pip install numpy

import os
import urllib.request
import zipfile
import speech_recognition as sr
from textblob import TextBlob
from pydub import AudioSegment
from pydub.playback import play

# Função para carregar arquivos de áudio da base de dados LibriSpeech
def load_audio_files(data_dir):
    audio_files = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith(".flac"):
                audio_files.append(os.path.join(root, file))
    return audio_files

# Função para transcrever áudio em texto usando reconhecimento de fala
def transcribe_audio(file_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(file_path) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand the audio.")
        except sr.RequestError as e:
            print(f"Could not request results from Google Speech Recognition service; {e}")
    return None

# Função para analisar o sentimento do texto
def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment

# Função para personalizar a interface com base no sentimento
def personalize_interface(sentiment):
    if sentiment.polarity > 0:
        return "Showing positive content."
    elif sentiment.polarity < 0:
        return "Showing content to uplift your mood."
    else:
        return "Showing neutral content."

# Baixar e descompactar os dados LibriSpeech
url = 'http://www.openslr.org/resources/12/dev-clean.tar.gz'
urllib.request.urlretrieve(url, 'dev-clean.tar.gz')

import tarfile
with tarfile.open('dev-clean.tar.gz', 'r:gz') as tar:
    tar.extractall()

# Definir o caminho para os dados descompactados
data_dir = 'LibriSpeech/dev-clean'

# Carregar arquivos de áudio
audio_files = load_audio_files(data_dir)

for audio_file in audio_files:
    print(f"Processing file: {audio_file}")
    text = transcribe_audio(audio_file)
    if text:
        sentiment = analyze_sentiment(text)
        print(f"Transcribed Text: {text}")
        print(f"Sentiment Analysis: Polarity = {sentiment.polarity}, Subjectivity = {sentiment.subjectivity}")
        personalized_content = personalize_interface(sentiment)
        print(personalized_content)
        break  # Remova esta linha para processar todos os arquivos de áudio


Processing file: LibriSpeech/dev-clean/5895/34615/5895-34615-0005.flac
Transcribed Text: Gwen plane was a mountebank
Sentiment Analysis: Polarity = 0.0, Subjectivity = 0.0
Showing neutral content.


In [15]:
# Install necessary libraries
!pip install SpeechRecognition
!pip install textblob
!pip install pydub
!pip install numpy

import os
import urllib.request
import tarfile
import speech_recognition as sr
from textblob import TextBlob
from pydub import AudioSegment
from pydub.playback import play
import json

# Function to download and extract LibriSpeech data
def download_and_extract_data(url, extract_to='.'):
    filename = url.split('/')[-1]
    print(f"Downloading {filename}...")
    urllib.request.urlretrieve(url, filename)
    print(f"Extracting {filename}...")
    with tarfile.open(filename, 'r:gz') as tar:
        tar.extractall(path=extract_to)
    print("Download and extraction completed.")

# Function to load audio files from the LibriSpeech dataset
def load_audio_files(data_dir):
    audio_files = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith(".flac"):
                audio_files.append(os.path.join(root, file))
    return audio_files

# Function to transcribe audio to text using speech recognition
def transcribe_audio(file_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(file_path) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand the audio.")
        except sr.RequestError as e:
            print(f"Could not request results from Google Speech Recognition service; {e}")
    return None

# Function to analyze the sentiment of the text
def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment

# Function to personalize the interface based on sentiment
def personalize_interface(sentiment):
    if sentiment.polarity > 0.5:
        return ("Showing very positive content: Inspirational articles, achievement videos, motivational messages.",
                ["positive_article1.html", "achievement_video1.mp4", "motivational_message1.txt"])
    elif sentiment.polarity > 0:
        return ("Showing positive content: Success stories, wellness tips, inspirational quotes.",
                ["success_story1.html", "wellness_tip1.txt", "inspirational_quote1.txt"])
    elif sentiment.polarity < -0.5:
        return ("Showing content to greatly uplift your mood: Comedies, funny videos, memes.",
                ["comedy1.mp4", "funny_video1.mp4", "meme1.jpg"])
    elif sentiment.polarity < 0:
        return ("Showing content to uplift your mood: Relaxing music, cute animal videos, supportive messages.",
                ["relaxing_music1.mp3", "cute_animal_video1.mp4", "supportive_message1.txt"])
    else:
        return ("Showing neutral content: News, informative articles, educational tutorials.",
                ["news1.html", "informative_article1.html", "educational_tutorial1.mp4"])

# Function to save results to a JSON file
def save_results(filename, results):
    with open(filename, 'w') as f:
        json.dump(results, f, ensure_ascii=False, indent=4)

# Function to process audio files and save results
def process_and_save_results(audio_files, output_file, num_files=10):
    results = []
    for i, audio_file in enumerate(audio_files[:num_files]):
        print(f"Processing file {i+1}/{num_files}: {audio_file}")
        text = transcribe_audio(audio_file)
        if text:
            sentiment = analyze_sentiment(text)
            print(f"Transcribed Text: {text}")
            print(f"Sentiment Analysis: Polarity = {sentiment.polarity}, Subjectivity = {sentiment.subjectivity}")
            personalized_content, content_files = personalize_interface(sentiment)
            print(personalized_content)
            result = {
                "audio_file": audio_file,
                "transcribed_text": text,
                "sentiment": {
                    "polarity": sentiment.polarity,
                    "subjectivity": sentiment.subjectivity
                },
                "personalized_content": personalized_content,
                "content_files": content_files
            }
            results.append(result)
    save_results(output_file, results)

# URL for the LibriSpeech dataset
url = 'http://www.openslr.org/resources/12/dev-clean.tar.gz'
download_and_extract_data(url)

# Define the path for the extracted data
data_dir = 'LibriSpeech/dev-clean'

# Load audio files
audio_files = load_audio_files(data_dir)

# Process and save results for the first 10 audio files
process_and_save_results(audio_files, 'results.json', num_files=10)


Downloading dev-clean.tar.gz...
Extracting dev-clean.tar.gz...
Download and extraction completed.
Processing file 1/10: LibriSpeech/dev-clean/5895/34615/5895-34615-0005.flac
Transcribed Text: Gwen plane was a mountebank
Sentiment Analysis: Polarity = 0.0, Subjectivity = 0.0
Showing neutral content: News, informative articles, educational tutorials.
Processing file 2/10: LibriSpeech/dev-clean/5895/34615/5895-34615-0011.flac
Transcribed Text: an everlasting laugh
Sentiment Analysis: Polarity = 0.3, Subjectivity = 0.1
Showing positive content: Success stories, wellness tips, inspirational quotes.
Processing file 3/10: LibriSpeech/dev-clean/5895/34615/5895-34615-0010.flac
Transcribed Text: all his emotions whatever they might have been augmented his strange face of Joy or to speak more correctly aggravated it
Sentiment Analysis: Polarity = 0.4166666666666667, Subjectivity = 0.2833333333333333
Showing positive content: Success stories, wellness tips, inspirational quotes.
Processing file 4/

In [16]:
import pandas as pd
import json
from IPython.display import display, HTML

# Load the results from the JSON file
def load_results(filename):
    with open(filename, 'r') as f:
        results = json.load(f)
    return results

# Function to display results in a modern, formatted manner
def display_results(results):
    html_content = """
    <html>
    <head>
        <style>
            body {
                font-family: Arial, sans-serif;
                margin: 0;
                padding: 20px;
                background-color: #f5f5f5;
            }
            .container {
                background-color: #ffffff;
                padding: 20px;
                margin: 20px 0;
                border-radius: 8px;
                box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            }
            h2 {
                color: #333333;
                margin-top: 0;
            }
            p {
                color: #666666;
            }
            .content-block {
                margin: 10px 0;
            }
            .content-files {
                margin: 10px 0;
            }
            .content-files span {
                background-color: #e7f3fe;
                color: #31708f;
                padding: 5px 10px;
                border-radius: 5px;
                margin-right: 5px;
            }
        </style>
    </head>
    <body>
    """

    for result in results:
        html_content += f"""
        <div class="container">
            <h2>Processing file: {result['audio_file']}</h2>
            <div class="content-block">
                <strong>Transcribed Text:</strong>
                <p>{result['transcribed_text']}</p>
            </div>
            <div class="content-block">
                <strong>Sentiment Analysis:</strong>
                <p>Polarity = {result['sentiment']['polarity']}, Subjectivity = {result['sentiment']['subjectivity']}</p>
            </div>
            <div class="content-block">
                <strong>Personalized Content:</strong>
                <p>{result['personalized_content']}</p>
            </div>
            <div class="content-files">
                <strong>Related Files:</strong>
        """
        for file in result['content_files']:
            html_content += f"<span>{file}</span>"
        html_content += "</div></div>"

    html_content += """
    </body>
    </html>
    """

    display(HTML(html_content))

# Load and display results
results = load_results('results.json')
display_results(results)


In [17]:
# Install necessary library
!pip install plotly

import pandas as pd
import json
import plotly.express as px
from IPython.display import display, HTML

# Load the results from the JSON file
def load_results(filename):
    with open(filename, 'r') as f:
        results = json.load(f)
    return results

# Function to create a DataFrame from results
def create_dataframe(results):
    data = {
        "File": [],
        "Transcribed Text": [],
        "Polarity": [],
        "Subjectivity": [],
        "Personalized Content": []
    }

    for result in results:
        data["File"].append(result["audio_file"])
        data["Transcribed Text"].append(result["transcribed_text"])
        data["Polarity"].append(result["sentiment"]["polarity"])
        data["Subjectivity"].append(result["sentiment"]["subjectivity"])
        data["Personalized Content"].append(result["personalized_content"])

    return pd.DataFrame(data)

# Function to display results in a modern, formatted manner
def display_results(results):
    df = create_dataframe(results)

    # Create a scatter plot for sentiment analysis
    fig = px.scatter(df, x="Polarity", y="Subjectivity", text="File",
                     color="Polarity", size="Subjectivity",
                     hover_data=["Transcribed Text", "Personalized Content"],
                     title="Sentiment Analysis of Audio Files",
                     labels={
                         "Polarity": "Polarity (Positivity/Negativity)",
                         "Subjectivity": "Subjectivity (Objective/Subjective)"
                     })

    fig.update_traces(textposition='top center')
    fig.update_layout(autosize=True, height=600)

    # Display the DataFrame and the plot
    display(HTML("<h2>Sentiment Analysis Results</h2>"))
    display(df)
    fig.show()

# Load and display results
results = load_results('results.json')
display_results(results)




Unnamed: 0,File,Transcribed Text,Polarity,Subjectivity,Personalized Content
0,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,Gwen plane was a mountebank,0.0,0.0,"Showing neutral content: News, informative art..."
1,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,an everlasting laugh,0.3,0.1,"Showing positive content: Success stories, wel..."
2,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,all his emotions whatever they might have been...,0.416667,0.283333,"Showing positive content: Success stories, wel..."
3,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,the operation whatever it had been which had d...,0.0,0.0,"Showing neutral content: News, informative art..."
4,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,besides we must remember that they had in thos...,0.25,1.0,"Showing positive content: Success stories, wel..."
5,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,his hair having probably been dyed with some c...,-0.05,0.2,Showing content to uplift your mood: Relaxing ...
6,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,besides this face those who had brought him up...,0.0,0.0,"Showing neutral content: News, informative art..."
7,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,no one could escape from this rictus,0.0,0.0,"Showing neutral content: News, informative art..."
8,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,Gwen plane had yellow hair,0.0,0.0,"Showing neutral content: News, informative art..."
9,LibriSpeech/dev-clean/5895/34615/5895-34615-00...,it's yellow bristles rather a man than a head ...,0.125,0.125,"Showing positive content: Success stories, wel..."
