In [1]:
# install Libraries
!pip install torch==2.5.0 transformers TTS langdetect

Collecting torch==2.5.0
  Downloading torch-2.5.0-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting TTS
  Downloading TTS-0.22.0-cp311-cp311-manylinux1_x86_64.whl.metadata (21 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.5.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.5.0)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.5.0)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.5.0)
  Downlo

In [24]:
#Import Libraries

from TTS.api import TTS
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from langdetect import detect
import io
import torch

`Download Models`

In [53]:
# TTS
try:
    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda" if torch.cuda.is_available() else "cpu")
except Exception as e:
    raise RuntimeError(f"Failed to load TTS model: {str(e)}")


try:
    # model aubmindlab for arabic
    arabic_model_name = "aubmindlab/bert-base-arabertv02-twitter"
    sentiment_tokenizer = AutoTokenizer.from_pretrained(arabic_model_name)
    sentiment_model = AutoModelForSequenceClassification.from_pretrained("UBC-NLP/MARBERT")
except Exception as e:
    raise RuntimeError(f"Failed to load Arabic sentiment analysis model: {str(e)}")

# model distilbert for english
try:
    sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
except Exception as e:
    raise RuntimeError(f"Failed to load English sentiment analysis model: {str(e)}")


 > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.
 > Using model: xtts


config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/654M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


model.safetensors:   0%|          | 0.00/654M [00:00<?, ?B/s]

In [55]:
# Converting feelings into words that are suitable for pronunciation
def map_sentiment_to_emotion(sentiment, language="en"):
    if language == "ar":
        # For arabic
        if sentiment == "positive" or sentiment == "POS":
            return "happy"
        elif sentiment == "negative" or sentiment == "NEG":
            return "sad"
        else:
            return "neutral"
    else:
        # For English
        if "positive" in sentiment.lower():
            return "happy"
        elif "negative" in sentiment.lower():
            return "sad"
        else:
            return "neutral"

#
def arabic_sentiment_analysis(text):
    # Use a simple method using Arabic keywords for feelings.
    positive_words = ["سعيد", "فرح", "ممتاز", "رائع", "جيد", "حب", "جميل", "نجاح", "أحسنت", "شكرا"]
    negative_words = ["حزين", "غاضب", "سيء", "فشل", "خطأ", "مشكلة", "صعب", "لا أحب", "سخيف", "مؤسف"]

    # Count positive and negative words in the text.
    positive_count = sum(1 for word in positive_words if word in text.lower())
    negative_count = sum(1 for word in negative_words if word in text.lower())

    # Determine sentiment based on word count
    if positive_count > negative_count:
        return "positive"
    elif negative_count > positive_count:
        return "negative"
    else:
        # Use more complex analysis when there are no clear words.
        try:
            # Another free template can be used if available.
            inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
            with torch.no_grad():
                outputs = sentiment_model(**inputs)

            # Determine the category based on the resulting values.
            sentiment_class = torch.argmax(outputs.logits).item()

            # Convert to appropriate category (positive/negative/neutral)
            if sentiment_class == 0:
                return "negative"
            elif sentiment_class == 1:
                return "neutral"
            else:
                return "positive"
        except:
            #In case of failure, we consider the text neutral.
            return "neutral"

# Improved language detection
def detect_language_safely(text):
    try:
        # More accurate detection of Arabic language
        if any('\u0600' <= c <= '\u06FF' for c in text):
            return "ar"
        return detect(text)
    except Exception:
        # If failed, check for Arabic characters.
        if any('\u0600' <= c <= '\u06FF' for c in text):
            return "ar"
        return "en"




`Inference`

In [66]:

text = "i am ver happy , really"

# Better language detection
detected_language = detect_language_safely(text)
language = "ar" if detected_language == "ar" else "en"

# Sentiment analysis
emotion = "neutral"
sentiment_result = None

if language == "en":
    try:
        sentiment_result = sentiment_analyzer(text)[0]
        emotion = map_sentiment_to_emotion(sentiment_result["label"])
        print(f"English sentiment analysis result: {sentiment_result}")
    except Exception as e:
        print(f"Sentiment analysis failed: {str(e)}")
else:
    try:
        sentiment_result = arabic_sentiment_analysis(text)
        emotion = map_sentiment_to_emotion(sentiment_result, language="ar")
        print(f"Arabic sentiment analysis result: {sentiment_result}")
    except Exception as e:
        print(f"Arabic sentiment analysis failed: {str(e)}")


print(f"Sentiment analysis: {sentiment_result}")
print(f"Language detected: {language}")

# Specify the final audio file name.
output_filename = "output.wav"

#Create audio and save it to file
try:
    tts.tts_to_file(
        text=text,
        file_path=output_filename,
        emotion=emotion,
        speaker_wav="speaker.wav",  
        language=language
    )
    print(f"✅ The audio was successfully created and saved in{output_filename}")
    print(f"🔊 Feelings discovered: {emotion.capitalize()}")
except Exception as e:
    print(f"❌ Failed to create audio: {str(e)}")

English sentiment analysis result: {'label': 'POSITIVE', 'score': 0.9993470311164856}
Sentiment analysis: {'label': 'POSITIVE', 'score': 0.9993470311164856}
Language detected: en
 > Text splitted to sentences.
['i am ver happy , really']
 > Processing time: 2.0088655948638916
 > Real-time factor: 0.5864155682952342
✅ The audio was successfully created and saved inoutput.wav
🔊 Feelings discovered: Happy


In [67]:
from IPython.display import Audio
Audio("output.wav")