<a href="https://colab.research.google.com/github/ianotiato/-AI_Tools_Assignment/blob/main/task3_nlp_spacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Task 3: NLP with spaCy - NER and Sentiment Analysis
import spacy
from spacy import displacy
import random

# Install spaCy model (run this once)
# !python -m spacy download en_core_web_sm

# Load spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Please download the spaCy model first:")
    print("!python -m spacy download en_core_web_sm")

# Sample Amazon reviews data
reviews = [
    "I bought the Apple iPhone 13 from Amazon and it's amazing! The camera quality is superb.",
    "The Samsung Galaxy phone I received had battery issues. Very disappointed with this product.",
    "Microsoft Surface Pro is a great device for work, but the price is too high.",
    "I love my new Sony headphones! The sound quality is incredible and they're very comfortable.",
    "The Dell laptop stopped working after 2 months. Worst purchase ever.",
    "Google Pixel has the best camera I've ever used in a smartphone.",
    "My HP printer constantly has paper jams. Not recommended.",
    "The Lenovo ThinkPad is built like a tank and performs excellently for business use."
]

print("NAMED ENTITY RECOGNITION AND SENTIMENT ANALYSIS")
print("=" * 60)

# Rule-based sentiment keywords
positive_words = {'amazing', 'great', 'love', 'incredible', 'superb', 'excellent', 'best', 'good', 'awesome', 'fantastic'}
negative_words = {'disappointed', 'worst', 'bad', 'terrible', 'horrible', 'awful', 'issues', 'jams'}

def analyze_sentiment(text):
    """Rule-based sentiment analysis"""
    doc = nlp(text.lower())
    positive_count = sum(1 for token in doc if token.text in positive_words)
    negative_count = sum(1 for token in doc if token.text in negative_words)

    if positive_count > negative_count:
        return "Positive"
    elif negative_count > positive_count:
        return "Negative"
    else:
        return "Neutral"

# Process each review
for i, review in enumerate(reviews, 1):
    print(f"\nReview {i}:")
    print(f"Text: {review}")

    # Perform NER
    doc = nlp(review)

    # Extract entities
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    print("Named Entities:", entities)

    # Extract product names and brands (focus on ORG and PRODUCT entities)
    products_brands = [ent.text for ent in doc.ents if ent.label_ in ['ORG', 'PRODUCT']]
    print("Products/Brands:", products_brands)

    # Sentiment analysis
    sentiment = analyze_sentiment(review)
    print("Sentiment:", sentiment)
    print("-" * 50)

# Visualize NER for one sample review
sample_review = reviews[0]
doc = nlp(sample_review)

print(f"\nVISUALIZATION FOR SAMPLE REVIEW:")
print(f"Review: {sample_review}")
displacy.render(doc, style="ent", jupyter=True)

# Additional analysis: Most common entities
print("\nENTITY ANALYSIS ACROSS ALL REVIEWS:")
all_entities = []
for review in reviews:
    doc = nlp(review)
    for ent in doc.ents:
        all_entities.append((ent.text, ent.label_))

# Count entity types
from collections import Counter
entity_counts = Counter([label for _, label in all_entities])
print("Entity Type Distribution:")
for entity_type, count in entity_counts.most_common():
    print(f"  {entity_type}: {count}")

# Most mentioned products/brands
org_products = [text for text, label in all_entities if label in ['ORG', 'PRODUCT']]
brand_counts = Counter(org_products)
print("\nMost Mentioned Products/Brands:")
for brand, count in brand_counts.most_common(5):
    print(f"  {brand}: {count} times")

NAMED ENTITY RECOGNITION AND SENTIMENT ANALYSIS

Review 1:
Text: I bought the Apple iPhone 13 from Amazon and it's amazing! The camera quality is superb.
Named Entities: [('Apple', 'ORG'), ('13', 'CARDINAL'), ('Amazon', 'ORG')]
Products/Brands: ['Apple', 'Amazon']
Sentiment: Positive
--------------------------------------------------

Review 2:
Text: The Samsung Galaxy phone I received had battery issues. Very disappointed with this product.
Named Entities: []
Products/Brands: []
Sentiment: Negative
--------------------------------------------------

Review 3:
Text: Microsoft Surface Pro is a great device for work, but the price is too high.
Named Entities: [('Microsoft Surface Pro', 'ORG')]
Products/Brands: ['Microsoft Surface Pro']
Sentiment: Positive
--------------------------------------------------

Review 4:
Text: I love my new Sony headphones! The sound quality is incredible and they're very comfortable.
Named Entities: [('Sony', 'ORG')]
Products/Brands: ['Sony']
Sentiment: Posi


ENTITY ANALYSIS ACROSS ALL REVIEWS:
Entity Type Distribution:
  ORG: 6
  CARDINAL: 1
  DATE: 1
  PERSON: 1

Most Mentioned Products/Brands:
  Apple: 1 times
  Amazon: 1 times
  Microsoft Surface Pro: 1 times
  Sony: 1 times
  Dell: 1 times
