In [1]:
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
"""
The pipelines below are selected for their balance of accurary, speed, and relevance to the S2S Therapist that is to be built.

- sentiment_pipeline: Uses DistilBERT fine-tuned on SST-2 for fast, reliable binary sentiment classification. This replaces the previous, larger model trained on social media data, which included a neutral category not needed for this application.
- emotion_pipeline: Lightweight DistilBERT-based model trained on six emotions, designed for faster, real-time detection of moods. This distilled version of previous models is generally used for chatbot conversations.
- zero_shot_pipeline: Testing to utilse in conjuction with sentiment and emotion pipelines for flexible and dynamic responses.
- ner_pipeline: DistilBERT-based NER for accurate, real-time entity extraction of names, organisations, and locations - again to aid the previous pipelines and final output generation.
"""


sentiment_pipeline = pipeline(
    "sentiment-analysis", 
    model="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
)

emotion_pipeline = pipeline(
    "text-classification", 
    model="hamzawaheed/emotion-classification-model"
)

zero_shot_pipeline = pipeline(
    "zero-shot-classification", 
    model="facebook/bart-large-mnli")

ner_pipeline = pipeline(
    "ner", 
    model="dslim/distilbert-NER", 
    aggregation_strategy="simple"
)

Device set to use cpu
Device set to use cpu
Device set to use cpu
Device set to use cpu


In [3]:
def analyse_sentiment(text: str) -> dict:
    result = sentiment_pipeline(text)[0]
    
    return {
        "sentiment": result["label"],
        "confidence": result["score"]
    }

In [4]:
analyse_sentiment("This is amazing!")

{'sentiment': 'POSITIVE', 'confidence': 0.9998769760131836}

In [5]:
def analyse_emotions(text: str) -> dict:
    emotions = emotion_pipeline(text)

    label_map = {
    "LABEL_0": "sadness",
    "LABEL_1": "joy",
    "LABEL_2": "love",
    "LABEL_3": "anger",
    "LABEL_4": "fear",
    "LABEL_5": "surprise"
    }

    # Identify the highest-scoring emotion
    top_emotion = max(emotions, key=lambda x: x["score"])

    return {
        "primary_emotion": label_map.get(top_emotion["label"], top_emotion["label"]),
        "primary_confidence": top_emotion["score"]
    }

In [6]:
analyse_emotions("This makes me feel happy but makes me feel happy")

{'primary_emotion': 'joy', 'primary_confidence': 0.9986132383346558}

In [7]:
def analyse_zero_shot(text: str, labels: list) -> dict:
    result = zero_shot_pipeline(text, candidate_labels=labels)
    
    return {
        "top_category": result["labels"][0],
        "top_score": result["scores"][0],
    }

In [8]:
text = "I feel down"
labels = [
    "anger",
    "sadness",
    "anxiety",
    "stress",
    "happiness",
    "gratitude"
]
analyse_zero_shot(text, labels)

{'top_category': 'sadness', 'top_score': 0.9204614162445068}

In [9]:
def analyse_ner(text: str) -> list:
    # Get the raw results from the NER pipeline
    results = ner_pipeline(text)

    # Create a new list to store the cleaned entities
    cleaned_entities = []

    # Go through each result and pull out the important parts
    for item in results:
        entity_info = {
            "entity": item["entity_group"],
            "word": item["word"],
            "score": item["score"]
        }
        cleaned_entities.append(entity_info)

    return cleaned_entities

In [10]:
ner_text = "I met Elliot at the Tesla clinic in Sydney."
analyse_ner(ner_text)

[{'entity': 'PER', 'word': 'Elliot', 'score': np.float32(0.9977064)},
 {'entity': 'ORG', 'word': 'Te', 'score': np.float32(0.96471554)},
 {'entity': 'ORG', 'word': '##sla', 'score': np.float32(0.78039974)},
 {'entity': 'LOC', 'word': 'Sydney', 'score': np.float32(0.99765944)}]

In [11]:
def get_combined_analysis(text: str, zero_shot_labels: list) -> dict:
    sentiment_data = analyse_sentiment(text)
    emotion_data = analyse_emotions(text)
    zero_shot_data = analyse_zero_shot(text, zero_shot_labels)
    ner_data = analyse_ner(text)
    return {
        "text": text,
        "sentiment": sentiment_data,
        "emotion": emotion_data,
        "zero_shot_classification": zero_shot_data,
        "entities": ner_data
    }

In [16]:
zero_shot_labels = ["anger", "sadness", "anxiety", "stress", "happiness", "gratitude"]
# zero_shot_labels = ["support", "advice", "self-reflection", "stress management", "motivation", "relationship", "celebration", "general conversation"]
get_combined_analysis("I recently moved to Melbourne and I feel refreshed.", zero_shot_labels)

{'text': 'I recently moved to Melbourne and I feel refreshed.',
 'sentiment': {'sentiment': 'POSITIVE', 'confidence': 0.9997652173042297},
 'emotion': {'primary_emotion': 'joy',
  'primary_confidence': 0.9972177743911743},
 'zero_shot_classification': {'top_category': 'happiness',
  'top_score': 0.379910945892334},
 'entities': [{'entity': 'LOC',
   'word': 'Melbourne',
   'score': np.float32(0.9958221)}]}