## ABSA with ATE —> DeBERTa pipeline

In [11]:
%pip install spacy transformers seaborn matplotlib pandas

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Note: you may need to restart the kernel to use updated packages.


In [12]:
import spacy
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

# Spacy model
nlp = spacy.load("en_core_web_sm")

# Pre-trained DeBERTa model for absa
model_name = "yangheng/deberta-v3-base-absa-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)




In [None]:
def extract_aspects(text):
    doc = nlp(text)
    aspects = []
    for chunk in doc.noun_chunks:  # extract noun phrases
        if any(token.dep_ in ("nsubj", "dobj") for token in chunk):  # focus on key aspects
            aspects.append(chunk.text)  # store the full noun phrase
    return aspects

In [None]:
def analyze_aspect_sentiment(text, aspects):
    sentiment_results = {}
    for aspect in aspects:
        result = classifier(text, text_pair=aspect)[0]  # get full result dict
        sentiment = result['label']
        confidence = round(result['score'], 5)  # round to 5 digits
        sentiment_results[aspect] = {'sentiment': sentiment, 'confidence': confidence}
    return sentiment_results

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

def evaluate_model_on_csv(df):
    y_true = []
    y_pred = []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        sentence = row['Sentence']
        aspect = row['Aspect Term']
        true_label = row['polarity'].lower()

        input_text = f"{aspect} in '{sentence}' is"
        pred_label = classifier(input_text)[0]['label'].lower()

        y_true.append(true_label)
        y_pred.append(pred_label)

    # Report
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, digits=4))

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred, labels=["positive", "negative", "neutral"])
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=["positive", "negative", "neutral"],
                yticklabels=["positive", "negative", "neutral"])
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.show()


In [None]:
df = pd.read_csv("data/Laptop_Train_v2.csv")
evaluate_model_on_csv(df)

In [13]:
# example text
text = "The camera quality of this phone is amazing, but the battery life is disappointing."

# extract aspects
aspects = extract_aspects(text)
print("Extracted Aspects:", aspects)

# get sentiment for each aspect
aspect_sentiments = analyze_aspect_sentiment(text, aspects)
print("Aspect Sentiment Analysis:", aspect_sentiments)

Extracted Aspects: ['The camera quality', 'the battery life']
Aspect Sentiment Analysis: {'The camera quality': {'sentiment': 'Positive', 'confidence': 0.99813}, 'the battery life': {'sentiment': 'Negative', 'confidence': 0.99647}}
