In [13]:
# Install required packages
!pip install torch transformers spacy
!python -m spacy download en_core_web_sm

import spacy
import re
from transformers import pipeline

nlp = spacy.load('en_core_web_sm')

def split_into_clauses(text):
    doc = nlp(text)
    clauses = []
    for sent in doc.sents:
        parts = re.split(r'[\?\!\.;]|\b(?:and|or|but|because|so|if|then)\b', sent.text)
        clauses.extend([p.strip() for p in parts if p.strip()])
    return clauses

classifier = pipeline("zero-shot-classification",
                    model="roberta-large-mnli",
                    framework="pt")

EMOTION_WORDS = [
    'affectionate', 'afraid', 'alienated', 'amused', 'angry', 'anguished', 'annoyed', 'anxious', 'apathetic', 'apprehensive',
    'aroused', 'ashamed', 'astonished', 'awed', 'bewildered', 'blissful', 'bored', 'calm', 'cheerful', 'compassionate',
    'confident', 'confused', 'contemptuous', 'content', 'crushed', 'curious', 'dejected', 'delighted', 'depressed',
    'deprived', 'desolate', 'devastated', 'disappointed', 'discouraged', 'disgruntled', 'disheartened', 'disillusioned',
    'dismayed', 'dismal', 'displeased', 'distrustful', 'doubtful', 'downcast', 'eager', 'edgy', 'elated', 'embarrassed',
    'empathetic', 'enthusiastic', 'envious', 'exasperated', 'excited', 'exhilarated', 'fearful', 'frightened', 'frustrated',
    'furious', 'gleeful', 'gloomy', 'grateful', 'grieving', 'guilty', 'happy', 'helpless', 'horrified', 'humiliated', 'hurt',
    'hysterical', 'indifferent', 'inspired', 'intimidated', 'irritated', 'jealous', 'joyful', 'jubilant', 'livid', 'lonely',
    'loved', 'melancholy', 'miserable', 'mortified', 'nervous', 'optimistic', 'outraged', 'overwhelmed', 'panicked',
    'paranoid', 'passionate', 'peaceful', 'pensive', 'perplexed', 'pessimistic', 'petrified', 'playful', 'pleased', 'proud',
    'rage', 'regretful', 'relieved', 'reluctant', 'remorseful', 'resentful', 'restless', 'sad', 'satisfied', 'scared',
    'self-conscious', 'serene', 'shameful', 'shocked', 'skeptical', 'sorrowful', 'startled', 'stressed', 'suspicious',
    'sympathetic', 'tense', 'terrified', 'thankful', 'thrilled', 'touched', 'triumphant', 'uneasy', 'upset', 'vengeful',
    'vulnerable', 'wistful', 'worried', 'zealous'
]
  # Your emotion words array here
CAUSE_MARKERS = ['because', 'since', 'due to', 'as a result of', 'led to']

def enhanced_clause_analysis(text, emotion_threshold=0.65, cause_threshold=0.6):
    clauses = split_into_clauses(text)
    results = []

    for clause in clauses:
        # Lexical detection
        has_emotion_lex = any(word in clause.lower() for word in EMOTION_WORDS)
        has_cause_lex = any(marker in clause.lower() for marker in CAUSE_MARKERS)

        # Zero-shot classification
        result = classifier(
            clause,
            candidate_labels=["emotional clause", "causal clause"],
            hypothesis_template="This is a {}.",
            multi_label=True
        )

        # Extract scores
        emotional_score = result['scores'][result['labels'].index('emotional clause')]
        causal_score = result['scores'][result['labels'].index('causal clause')]

        # Determine labels
        labels = []
        if (emotional_score >= emotion_threshold) or has_emotion_lex :
            labels.append("emotion")
        if causal_score >= cause_threshold or has_cause_lex:
            labels.append("cause")

        final_label = "both" if len(labels) == 2 else labels[0] if labels else "neutral"

        results.append({
            "clause": clause,
            "label": final_label,
            "scores": {
                "emotion": emotional_score,
                "cause": causal_score
            }
        })

    return results

# Test the function
text = "I'm anxious because the deadline is approaching. Can we extend it?"
result = enhanced_clause_analysis(text)
print(result)


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m58.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


[{'clause': "I'm anxious", 'label': 'both', 'scores': {'emotion': 0.9984240531921387, 'cause': 0.9449737668037415}}, {'clause': 'the deadline is approaching', 'label': 'cause', 'scores': {'emotion': 0.24603313207626343, 'cause': 0.693003237247467}}, {'clause': 'Can we extend it', 'label': 'neutral', 'scores': {'emotion': 0.31848230957984924, 'cause': 0.22840142250061035}}]
