In [5]:
# Task 3: NLP with spaCy
# Text Data: User reviews from Amazon Product Reviews.
# Goal:Perform named entity recognition (NER) to extract product names and brands.Analyze sentiment (positive/negative) using a rule-based approach.
# Deliverable: Code snippet and output showing extracted entities and sentiment
# Import necessary libraries
import spacy
from spacy import displacy
# Load the English NLP model
nlp = spacy.load("en_core_web_sm")
# Sample text data (Amazon product reviews)
text_data = [
    "I love the new iPhone 13! The camera is amazing.",
    "The Samsung Galaxy S21 has a great display but poor battery life.",
    "Just bought a pair of Adidas running shoes. They are very comfortable.",
    "The quality of this Sony headphones is outstanding.",
    "I didn't like the design of the Google Pixel 6."
]
# Process the text data
processed_data_list = [] # Create a list to store the processed data for JSON serialization
for text in text_data:
    doc = nlp(text)
    # Extract named entities
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    print("Named Entities:", entities)
    # Visualize the entities
    displacy.render(doc, style="ent")
    # Analyze sentiment (simple rule-based approach)
    sentiment = "positive" if "love" in text or "great" in text or "comfortable" in text else "negative"
    print("Sentiment:", sentiment)
    # Append the processed data to the list
    processed_data_list.append({"text": text, "entities": entities, "sentiment": sentiment})

# Note: The sentiment analysis here is very basic and can be improved with more sophisticated methods.
# Save the processed text data for further analysis
import json
with open('processed_reviews.json', 'w') as f:
    # Dump the list of dictionaries which now contains serializable data
    json.dump(processed_data_list, f, indent=4) # Added indent for better readability

# Load the processed data
with open('processed_reviews.json', 'r') as f:
    processed_data = json.load(f)
print("Processed Data:", processed_data)
# Display the processed data
for item in processed_data:
    print(f"Text: {item['text']}")
    # Entities are now loaded as lists of tuples
    print(f"Named Entities: {item['entities']}")
    print(f"Sentiment: {item['sentiment']}")
# Clean up the generated file
import os
if os.path.exists("processed_reviews.json"):
    os.remove("processed_reviews.json")
# End of the script

Named Entities: [('13', 'CARDINAL')]


Sentiment: positive
Named Entities: [('The Samsung Galaxy S21', 'ORG')]


Sentiment: positive
Named Entities: [('Adidas', 'PERSON')]


Sentiment: positive
Named Entities: [('Sony', 'ORG')]


Sentiment: negative
Named Entities: [('6', 'CARDINAL')]


Sentiment: negative
Processed Data: [{'text': 'I love the new iPhone 13! The camera is amazing.', 'entities': [['13', 'CARDINAL']], 'sentiment': 'positive'}, {'text': 'The Samsung Galaxy S21 has a great display but poor battery life.', 'entities': [['The Samsung Galaxy S21', 'ORG']], 'sentiment': 'positive'}, {'text': 'Just bought a pair of Adidas running shoes. They are very comfortable.', 'entities': [['Adidas', 'PERSON']], 'sentiment': 'positive'}, {'text': 'The quality of this Sony headphones is outstanding.', 'entities': [['Sony', 'ORG']], 'sentiment': 'negative'}, {'text': "I didn't like the design of the Google Pixel 6.", 'entities': [['6', 'CARDINAL']], 'sentiment': 'negative'}]
Text: I love the new iPhone 13! The camera is amazing.
Named Entities: [['13', 'CARDINAL']]
Sentiment: positive
Text: The Samsung Galaxy S21 has a great display but poor battery life.
Named Entities: [['The Samsung Galaxy S21', 'ORG']]
Sentiment: positive
Text: Just bought a pair of Adidas running shoes