**Submitted by**: Tuazon, Virtudez, & Lumakang

In [None]:
from typing import Tuple, List, Dict
import re  # Added for better word matching

CATEGORY_NAMES = {
    1: "Predominantly Positive, with Criticisms",
    2: "Predominantly Negative, with Praise",
    3: "Positive with a Minor Caveat",
    4: "Negative with a Small Positive",
    5: "Balanced Praise and Criticism"
}

POSITIVE_WORDS = {
    # Strong positive (weight: 2)
    'breathtaking': 2, 'stellar': 2, 'masterful': 2, 'brilliant': 2, 'groundbreaking': 2,
    'magical': 2, 'spectacular': 2, 'innovative': 2, 'perfect': 2, 'outstanding': 2,
    'exceptional': 2, 'extraordinary': 2, 'magnificent': 2, 'superb': 2, 'stunning': 2,
    'seamless': 2,

    # Standard positive (weight: 1)
    'sharp': 1, 'witty': 1, 'engaging': 1, 'compelling': 1, 'vibrant': 1, 'riveting': 1,
    'inspiring': 1, 'hilarious': 1, 'authentic': 1, 'uplifting': 1, 'dynamic': 1,
    'fresh': 1, 'captivating': 1, 'soulful': 1, 'heartwarming': 1, 'poignant': 1,
    'good': 1, 'great': 1, 'nice': 1, 'enjoyable': 1, 'fun': 1, 'interesting': 1,
    'smooth': 1, 'clever': 1, 'delightful': 1, 'powerful': 1, 'engaged': 1, 'heartfelt': 1,
    'memorable': 1, 'iconic': 1
}

NEGATIVE_WORDS = {
    # Strong negative (weight: 2)
    'tedious': 2, 'boring': 2, 'painful': 2, 'forgettable': 2, 'lifeless': 2,
    'pretentious': 2, 'underwhelming': 2, 'terrible': 2, 'awful': 2, 'horrible': 2,
    'dreadful': 2, 'atrocious': 2, 'cringeworthy': 2, 'unsatisfying': 2, 'unnecessary': 2,
    'incomplete': 2, 'painfully': 2,

    # Standard negative (weight: 1)
    'shallow': 1, 'uninspired': 1, 'mediocre': 1, 'disappointing': 1, 'contrived': 1,
   'predictable': 1, 'awkward': 1, 'forced': 1, 'bland': 1,
    'generic': 1, 'hollow': 1, 'rushed': 1, 'slow': 1, 'uneven': 1, 'flat': 1,
    'weak': 1, 'poorly': 1, 'bad': 1, 'derivative': 1, 'gimmicky': 1, 'repetitive': 1,
    'unconvincing': 1, 'disjointed': 1, 'overhyped':1, 'jarring': 1, 'clichéd': 1
}

CONTRAST_WORDS = {
    'but', 'though', 'however', 'despite', 'although', 'while',
    'yet', 'nonetheless', 'nevertheless', 'even if', 'even though', 'overshadowed'
}

NEGATION_WORDS = {'not', 'never', 'no', 'nothing', 'nowhere', 'none', 'hardly', 'barely'}

def tokenize(text: str) -> List[str]:
    """Convert text into tokens, handling punctuation and case."""
    words = re.findall(r'\b\w+\b', text.lower())
    return words

def analyze_sentiment(statement: str) -> Tuple[float, float, List[str], List[str], List[str], List[str]]:
    """Analyze sentiment of a statement using weighted keywords."""
    words = tokenize(statement)
    positive_score = 0
    negative_score = 0
    positive_words = []
    negative_words = []
    contrast_words = []
    negation_words = []

    for i, word in enumerate(words):
        prev_words = words[max(0, i-3):i]
        is_negated = any(neg in prev_words for neg in NEGATION_WORDS)

        # Check positive words
        if word in POSITIVE_WORDS:
            if is_negated:
                negative_score += POSITIVE_WORDS[word]
            else:
                positive_score += POSITIVE_WORDS[word]
                positive_words.append(word)

        # Check negative words
        if word in NEGATIVE_WORDS:
            if is_negated:
                positive_score += NEGATIVE_WORDS[word]
            else:
                negative_score += NEGATIVE_WORDS[word]
                negative_words.append(word)

        # Check contrast words
        if word in CONTRAST_WORDS:
            contrast_words.append(word)

        # Check negation words
        if word in NEGATION_WORDS:
            negation_words.append(word)

    return positive_score, negative_score, positive_words, negative_words, contrast_words, negation_words

def find_contrast_position(statement: str) -> float:

##Early contrast (closer to 0) means the positive sentiment is dominant, and the negative part is a minor issue.
#Late contrast (closer to 1) suggests that the negative sentiment is more significant, and the positive element is a small counterpoint.
    """Find the position of the first contrast word in the statement."""
    words = tokenize(statement)
    for i, word in enumerate(words):
        if word in CONTRAST_WORDS:
            contrast_pos = i / len(words)
            return contrast_pos
    return None

def categorize_statement(statement: str) -> str:
    """Categorize a statement based on sentiment analysis and contrast position."""
    positive_score, negative_score, positive_words, negative_words, contrast_words, negation_words = analyze_sentiment(statement)
    contrast_pos = find_contrast_position(statement)

    total_score = positive_score + negative_score
    sentiment_ratio = positive_score / total_score if total_score != 0 else 0.5

    # Decide category based on contrast position and sentiment ratio
    category = ""
    contrast_info = "No contrast word found"

    if contrast_pos is not None:
        # Determine early or late contrast
        if contrast_pos < 0.5:
            contrast_info = "Early contrast"
        else:
            contrast_info = "Late contrast"

        if contrast_pos > 0.8:
            if sentiment_ratio > 0.6:
                category = CATEGORY_NAMES[3]  # Positive with Minor Caveat
            elif sentiment_ratio < 0.4:
                category = CATEGORY_NAMES[4]  # Negative with Small Positive
        elif sentiment_ratio > 0.6:
            category = CATEGORY_NAMES[1]  # Predominantly Positive with Criticism
        elif sentiment_ratio < 0.4:
            category = CATEGORY_NAMES[2]  # Predominantly Negative with Praise
        else:
            category = CATEGORY_NAMES[5]  # Balanced Praise and Criticism
    else:
        if sentiment_ratio > 0.7:
            category = CATEGORY_NAMES[3]  # Positive with Minor Caveat
        elif sentiment_ratio < 0.3:
            category = CATEGORY_NAMES[4]  # Negative with Small Positive
        else:
            category = CATEGORY_NAMES[5]  # Balanced Praise and Criticism

    return category, sentiment_ratio, contrast_info

def analyze_reviews(statements: List[str]) -> List[Tuple[str, str]]:
    """Analyze a list of reviews and return categorized results."""
    results = []
    for statement in statements:
        category, sentiment_ratio, contrast_info = categorize_statement(statement)
        pos_score, neg_score, pos_words, neg_words, contrast_words, negation_words = analyze_sentiment(statement)
        results.append((
            statement,
            category,
            pos_score,
            neg_score,
            pos_words,
            neg_words,
            contrast_words,
            negation_words,
            sentiment_ratio,
            contrast_info
        ))
    return results

# Example usage:
if __name__ == "__main__":
     test_statements = [
         "The dialogue was sharp and witty, keeping me engaged, but the pacing was tedious, making the movie feel longer than it was.",
    "The film’s breathtaking visuals were overshadowed by its shallow and uninspired story.",
    "It was a riveting thriller that kept me on the edge of my seat, even though some of the action scenes felt repetitive.",
    "The story was so inspiring that I left the theater feeling motivated, but the romantic subplot felt forced and unnecessary.",
    "While the cinematography was visually stunning, the dialogue was painfully clichéd.",
    "The comedy was hilarious, with clever humor throughout, though the tonal shifts were occasionally jarring.",
    "The emotional scenes were deeply poignant, but the ending felt rushed and unsatisfying.",
    "The film was groundbreaking in its innovative storytelling, but some moments felt contrived and unrealistic.",
    "The characters felt authentic, though the villain was unconvincing and poorly developed.",
    "The animation was magical, bringing the story to life, but the script overall felt flat and unmemorable.",
    "The pacing was seamless, keeping the narrative smooth, but the climax was surprisingly underwhelming.",
    "The movie was uplifting and left me with a smile, even though it relied on gimmicky plot devices at times.",
    "The action sequences were intense, but the overuse of CGI made them feel lifeless.",
    "The performances were stellar across the board, though the storyline was predictable.",
    "The film offered a thought-provoking commentary on modern society but felt pretentious in its delivery.",
    "The direction was brilliant, elevating the entire experience, but the pacing occasionally felt disjointed.",
    "It was a delightful romantic comedy, though parts of it leaned too heavily on clichés.",
    "The soundtrack was soulful and perfectly complemented the scenes, but the dialogue was disappointingly bland.",
    "The film was fun and full of energy, even if the humor occasionally felt cringeworthy.",
    "The story was compelling, holding my attention until the end, but the characters’ decisions felt contrived.",
    "The movie was completely forgettable, despite its visually stunning landscapes.",
    "The plot was boring and dragged on, but the performances were powerful and heartfelt.",
    "The story was timeless, but the villain lacked depth and felt generic.",
    "The comedy was witty, though the overall pacing of the film was slow and uneven.",
    "The emotional journey was heartwarming, but the romance subplot was awkwardly forced.",
    "The movie relied on overhyped marketing but did deliver a few memorable moments.",
    "The cinematography was absolutely breathtaking, though the story was underwhelming.",
    "The dialogue felt authentic, but the plot was too predictable to leave a lasting impression.",
    "The film was uplifting, but the special effects felt gimmicky and out of place.",
    "The performances were dynamic and engaging, but the ending was rushed and unsatisfying.",
    "The movie’s compelling storyline was let down by bland and uninspired dialogue.",
    "The story felt fresh and original, though the climax was disappointingly underwhelming.",
    "The film was captivating, but the villain’s arc was unconvincing and lacked depth.",
    "The pacing was perfect, creating a gripping experience, though the side characters were forgettable.",
    "The action sequences were spectacular, but the overuse of CGI made them feel hollow.",
    "The humor was hilarious, but the tonal shifts made the movie feel jarring at times.",
    "The characters were wonderfully authentic, though the story felt overly derivative.",
    "The soundtrack was soulful, even though the script felt contrived in places.",
    "The direction was brilliant, but the story itself was mediocre and uninspired.",
    "The visuals were breathtaking, but the pacing was painfully slow.",
    "The performances were stellar, but the ending left too many loose ends unresolved.",
    "The dialogue was witty, but the romantic subplot was awkward and unnecessary.",
    "The movie was uplifting, though the humor occasionally felt forced.",
    "The story was deeply poignant, but the resolution felt rushed and incomplete.",
    "The film was both fun and engaging, but the villain was poorly developed.",
    "The emotional depth was heartwarming, but the plot itself was disappointingly shallow.",
    "The film had iconic moments, but it relied too heavily on overhyped nostalgia.",
    "The pacing was seamless, making the movie flow well, but the dialogue felt clichéd.",
    "The movie had compelling performances, though the story lacked originality and felt predictable."
]

for statement, category, pos_score, neg_score, pos_words, neg_words, contrast_words, negation_words, sentiment_ratio, contrast_info in analyze_reviews(test_statements):
        print(f"\nStatement: {statement}")
        print(f"Category: {category}")
        print(f"Sentiment Ratio: {sentiment_ratio:.2f}")
        print(f"Contrast Info: {contrast_info}")
        print(f"Positive Words Identified: {pos_words}")
        print(f"Negative Words Identified: {neg_words}")
        print(f"Contrast Words Identified: {contrast_words}")
        print(f"Negation Words Identified: {negation_words}")
        print(f"Scores - Positive: {pos_score:.1f}, Negative: {neg_score:.1f}")
        print(f"Words found: {tokenize(statement)}")  # Debug output to see tokenized words



Statement: The dialogue was sharp and witty, keeping me engaged, but the pacing was tedious, making the movie feel longer than it was.
Category: Balanced Praise and Criticism
Sentiment Ratio: 0.60
Contrast Info: Early contrast
Positive Words Identified: ['sharp', 'witty', 'engaged']
Negative Words Identified: ['tedious']
Contrast Words Identified: ['but']
Negation Words Identified: []
Scores - Positive: 3.0, Negative: 2.0
Words found: ['the', 'dialogue', 'was', 'sharp', 'and', 'witty', 'keeping', 'me', 'engaged', 'but', 'the', 'pacing', 'was', 'tedious', 'making', 'the', 'movie', 'feel', 'longer', 'than', 'it', 'was']

Statement: The film’s breathtaking visuals were overshadowed by its shallow and uninspired story.
Category: Balanced Praise and Criticism
Sentiment Ratio: 0.50
Contrast Info: Early contrast
Positive Words Identified: ['breathtaking']
Negative Words Identified: ['shallow', 'uninspired']
Contrast Words Identified: ['overshadowed']
Negation Words Identified: []
Scores - Po

# *code analysis:*



---


This is a sophisticated sentiment analysis system that goes beyond just "positive/negative/neutral" by looking at the nuances in how people express opinions. Let me explain why this is better and how it works.

---
# **Basically,**
Instead of just saying "this review is positive" or "this review is negative", *this system recognizes that most real opinions are mixed* - like when someone says "The movie was beautiful BUT the story was boring." It looks for these mixed opinions and classifies them into 5 more nuanced categories:

- "Predominantly Positive, with Criticisms"
- "Predominantly Negative, with Praise"
- "Positive with a Minor Caveat"
- "Negative with a Small Positive"
- "Balanced Praise and Criticism"

---

# **The Math (in simple terms):**

Word Weights: Instead of just counting positive/negative words, it gives them different weights:

Strong positive words (like "breathtaking", "stellar") = 2 points
Regular positive words (like "good", "nice") = 1 point
Strong negative words (like "terrible", "awful") = 2 points
Regular negative words (like "mediocre", "bland") = 1 point

---

# **Sentiment Ratio**: It calculates how positive/negative something is using:
sentiment_ratio = positive_score / (positive_score + negative_score)
This gives a number between 0 and 1, where:

- Close to 1 = Very positive
- Close to 0 = Very negative
- Around 0.5 = Mixed/balanced

---

Example:
`"The film's breathtaking visuals were overshadowed by its shallow and uninspired story."`

---

How it scores this:


"breathtaking" = +2 (strong positive)
"shallow" = -1 (negative)
"uninspired" = -1 (negative)

---

Total: +2 positive points, -2 negative points
Sentiment ratio: **2/(2+2) = 0.5**

---

It also finds the contrast word "overshadowed" and uses its position to understand that this is a **"Predominantly Negative, with Praise"** review, because the criticism comes after the praise and overshadows it.
Why this is better than simple positive/negative/neutral:

---

It catches the strength of feelings ("good" vs "breathtaking")
It understands that "not bad" is actually positive (using negation detection)
It recognizes when someone is mostly positive but has a small criticism, versus when criticism is the main point
It can tell the difference between balanced criticism ("The acting was good **but the story was bad**") and minor caveats ("The movie was amazing, **though a bit long**")

---

This matches how humans actually talk about movies much better than just saying something is purely positive or negative.

---

# Movie Review Patterns: Movie reviews often follow specific patterns like "Good acting BUT bad story" or "Amazing visuals DESPITE weak plot". This method is specifically designed to catch these patterns.

---

# Important Context: In movie reviews, where praise/criticism appears in the sentence matters. "The movie was amazing, though a bit long" is very different from "The movie was too long, though it had some amazing moments". This method catches this nuance.