In [3]:
"""
Task 3: NLP with spaCy
Dataset: Amazon Product Reviews (Kaggle)

Goal:
1. Perform Named Entity Recognition (NER) to extract product names and brands.
2. Analyze sentiment (positive/negative) using a rule-based approach.
3. Show sample outputs of entities and sentiment.
"""

# === Step 1: Import Libraries ===
import kagglehub
import pandas as pd
import spacy

# Download spaCy model if not already installed
%pip install spacy
!python -m spacy download en_core_web_sm

# === Step 2: Download Dataset from Kaggle ===
path = kagglehub.dataset_download("bittlingmayer/amazonreviews")
print("Path to dataset files:", path)

# The dataset includes text review files; for simplicity, load a small sample.
# (You can replace this with the actual file path inside the downloaded folder.)
try:
    df = pd.read_csv(f"{path}/train.ft.txt", sep='\t', header=None, names=['review'])
except:
    # For demonstration if file path varies, create sample text manually.
    df = pd.DataFrame({
        'review': [
            "I love my new Apple iPhone! The camera quality is excellent and battery lasts long.",
            "The Samsung Galaxy phone is terrible, slow, and not worth the price.",
            "Sony headphones deliver great sound quality, very comfortable to wear.",
            "The laptop by HP overheats quickly. Disappointed with the performance.",
            "I bought a Dell monitor, and it's absolutely perfect for my setup!"
        ]
    })

# === Step 3: Load spaCy English Model ===
# Make sure to install model: python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm")

# === Step 4: Define Rule-Based Sentiment Function ===
positive_words = {"love", "excellent", "great", "perfect", "amazing", "good", "happy"}
negative_words = {"bad", "terrible", "poor", "disappointed", "slow", "not worth", "worst"}

def analyze_sentiment(text):
    text_lower = text.lower()
    pos = sum(1 for w in positive_words if w in text_lower)
    neg = sum(1 for w in negative_words if w in text_lower)
    if pos > neg:
        return "Positive"
    elif neg > pos:
        return "Negative"
    else:
        return "Neutral"

# === Step 5: Perform NER and Sentiment Analysis ===
results = []

for review in df["review"].head(5):  # process first 5 reviews
    doc = nlp(review)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    sentiment = analyze_sentiment(review)
    results.append({
        "review": review,
        "entities": entities,
        "sentiment": sentiment
    })

# === Step 6: Display Extracted Information ===
for res in results:
    print("\n🔹 Review:", res["review"])
    print("   Named Entities:", res["entities"])
    print("   Sentiment:", res["sentiment"])


Note: you may need to restart the kernel to use updated packages.
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
      --------------------------------------- 0.3/12.8 MB ? eta -:--:--
      --------------------------------------- 0.3/12.8 MB ? eta -:--:--
     - ------------------------------------- 0.5/12.8 MB 598.5 kB/s eta 0:00:21
     - ------------------------------------- 0.5/12.8 MB 598.5 kB/s eta 0:00:21
     --- ----------------------------------- 1.0/12.8 MB 868.0 kB/s eta 0:00:14
     --- ----------------------------------- 1.0/12.8 MB 868.0 kB/s eta 0:00:14
     ---- ---------------------------------- 1.6/12.8 MB 964.5 kB/s eta 0:00:12
     ----- ---------------------------------- 1.8/12.8 MB 1.1 MB/s eta