In [4]:
import spacy
import warnings
import pandas as pd
import spacy.displacy 
warnings.filterwarnings('ignore')

nlp  = spacy.load('ner_amazon_embeddings_1\model-best')

texts = ["Dell laptops named TRA's 2024 Most Desired Brand, iPhone & Titan follow","The list of the top five most desired brands is headed by Dell Laptops, with Apple iPhone making an impressive rise of three spots to claim second place. Titan Watches holds the third position, while Sony Televisions saw a substantial jump, moving from eleventh to fourth. Life Insurance Corporation of India (LIC) completes the top five, advancing by eight ranks within the banking, financial services, and insurance (BFSI) sector. Significant advancements were also made by Honda Motorcycles and Maruti Suzuki, which climbed 32 and 26 ranks, respectively, to secure spots in the top ten."]
brands  = []

for text in texts:
    doc =  nlp(text)
    spacy.displacy.render(doc,style = "ent",jupyter=True)
    for ent in doc.ents:
        if ent.label_ == 'BRAND':
            brands.append(ent.text)

print("Detected Brands: ",brands)


Detected Brands:  ['Titan', 'Dell Laptops', 'Titan', 'eight']


In [2]:
from transformers import pipeline
import torch
import json
import re

In [3]:
# Initialise the ner pipeline for the pre trained model
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
def extract_entities(paragraph):
    entities = ner_pipeline(paragraph)
    products = []
    locations = []
    events = []
    dates = []
    
    for entity in entities:
        # Extract specific entity types
        if entity['entity'] == "B-PRODUCT":  # Assuming "PRODUCT" is a custom label
            products.append(entity['word'])
        elif entity['entity'] == "B-LOC":
            locations.append(entity['word'])
        elif entity['entity'] == "B-MISC":  # Used for events in some models
            events.append(entity['word'])
        elif entity['entity'] == "B-DATE":
            dates.append(entity['word'])
    
    return {
        "products": products,
        "locations": locations,
        "events": events,
        "dates": dates
    }


In [None]:
import spacy

def extract_entities_spacy(paragraph):
    # Use spaCy's larger pre-trained NER model
    nlp = spacy.load("en_core_web_lg")
    
    # Process the input paragraph
    doc = nlp(paragraph)
    
    entities = {
        "brands": [],
        "products": [],
        "locations": [],
        "events": [],
        "dates": []
    }
    
    for ent in doc.ents:
        # Extract specific entity types
        if ent.label_ == "PRODUCT":
            entities["products"].append(ent.text)
        elif ent.label_ == "ORG":
            entities["brands"].append(ent.text)
        elif ent.label_ == "GPE":
            entities["locations"].append(ent.text)
        elif ent.label_ == "EVENT":
            entities["events"].append(ent.text)
        elif ent.label_ == "DATE":
            entities["dates"].append(ent.text)
    
    # Normalize and deduplicate entity values
    for key, values in entities.items():
        entities[key] = list(set([
            re.sub(r'\W+', ' ', item).strip() 
            for item in values``
        ]))
    
    return entities

In [16]:
paragraph = "The Dzire will continue to be Maruti’s entry-level sedan offering.It gets a fresh face, all-LED lighting, and 15-inch dual-tone alloy wheels.Cabin highlights include dual-tone theme and a flat-bottom steering wheel.New features on board include a 9-inch touchscreen, sunroof, and a 360-degree camera.To be powered by the new Swift’s 1.2-litre, 3-cylinder petrol engine; will be offered with a CNG option as well.Expected to be priced from Rs 6.70 lakh (ex-showroom).One of the most popular sub-4m sedans in our market, the Maruti Dzire, is set to receive a generation update. It was spotted earlier completely undisguised, showing a distinct styling from the fourth-gen Swift. The carmaker has also opened its bookings, ahead of its scheduled launch tomorrow. If you are planning to buy it, here’s a recap of everything we know:"
extract_entities_spacy(paragraph)

{'brands': ['Maruti', 'sub 4m'],
 'products': ['Dzire', 'Swift'],
 'locations': [],
 'events': [],
 'dates': ['tomorrow', 'fourth gen']}

In [None]:
def recommend_products_from_news(paragraph):
    # Perform NER and sentiment analysis
    sentiment_score, confidence = get_sentiment_score(paragraph)
    entities = extract_entities(paragraph)
    
    # Check if sentiment is positive (arbitrary threshold, e.g., score >= 4 for positive)
    if sentiment_score >= 4:
        recommendation_data = {
            "text": paragraph,
            "recommended_products": entities["products"],
            "locations": entities["locations"],
            "sentiment_score": sentiment_score,
            "confidence": confidence
        }
        
        # Convert to JSON format for easy storage or display
        return json.dumps(recommendation_data, indent=4)
    else:
        return "No recommendations as the sentiment is not positive."

# Sample usage with a paragraph of a news article
paragraph = """
The new smartphone by XYZ Corp has taken the market by storm. With advanced AI capabilities and a sleek design, it's popular in New York and Los Angeles. 
Many users are thrilled with the product's performance.
"""

# Get recommendation
recommendation_json = recommend_products_from_news(paragraph)
print(recommendation_json)
