In [1]:
pip install nltk 


Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.5 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.5 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.5 MB ? eta -:--:--
   ------------- -------------------------- 0.5/1.5 MB 508.0 kB/s eta 0:00:02
   ------------- -------------------------- 0.5/1.5 MB 508.0 kB/s eta 0:00:02
   -------------------- ------------------- 0.8/1.5 MB 558.9 kB/s eta 0:00:02
   -------------------- ------------------- 0.8/1.5 MB 558.9 kB/s eta 0:00:02
   -------------------- ------------------- 0.8/1.5 MB 558.9 kB/s eta 0:00:02
   -------------------- ------------------- 0.8/1.5 MB 558.9 kB/s eta 0



In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import nltk
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Download NLTK data (if not already downloaded)
nltk.download('stopwords')
from nltk.corpus import stopwords

# Load the CSV file
df = pd.read_csv('output_products_with_keywords.csv')

# Display the first few rows
print("Initial Data:")
display(df.head())

# Data Preprocessing
## Fill missing values
df['About Product'].fillna('', inplace=True)
df['Assigned Keywords'].fillna('', inplace=True)

# Feature Engineering
## Sentiment Analysis using a Pre-trained Transformer Model
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

def get_sentiment_score(text):
    inputs = tokenizer.encode_plus(text, return_tensors='pt', truncation=True)
    outputs = model(**inputs)
    scores = outputs[0][0].detach().numpy()
    scores = torch.nn.functional.softmax(torch.tensor(scores), dim=0)
    sentiment_score = torch.sum(scores * torch.tensor([1, 2, 3, 4, 5])).item()
    return sentiment_score

print("Calculating sentiment scores...")

df['Sentiment Score'] = df['About Product'].apply(get_sentiment_score)

## Text Vectorization for Content-Based Filtering
print("Vectorizing text data...")
tfidf = TfidfVectorizer(stop_words='english')
df['Combined Text'] = df['About Product'] + ' ' + df['Assigned Keywords']
tfidf_matrix = tfidf.fit_transform(df['Combined Text'])

# Model Selection
## Content-Based Recommendation System
def recommend_ads(product_index, top_n=5):
    cosine_similarities = linear_kernel(tfidf_matrix[product_index:product_index+1], tfidf_matrix).flatten()
    related_docs_indices = cosine_similarities.argsort()[:-top_n-1:-1]
    recommended_products = df.iloc[related_docs_indices]
    return recommended_products[['ad_copy', 'Brand Name', 'Assigned Keywords', 'Sentiment Score']]

# Simulate User Interface for Publishers
def publisher_interface():
    print("\n--- Ad Recommendation System ---\n")
    product_description = input("Enter your product description: ")
    assigned_keywords = input("Enter assigned keywords (comma-separated): ")

    # Create a temporary DataFrame for the input
    temp_df = pd.DataFrame({
        'About Product': [product_description],
        'Assigned Keywords': [assigned_keywords]
    })
    temp_df['Combined Text'] = temp_df['About Product'] + ' ' + temp_df['Assigned Keywords']
    temp_df['Sentiment Score'] = temp_df['About Product'].apply(get_sentiment_score)
    temp_tfidf = tfidf.transform(temp_df['Combined Text'])

    # Calculate similarity
    cosine_similarities = linear_kernel(temp_tfidf, tfidf_matrix).flatten()
    related_docs_indices = cosine_similarities.argsort()[:-6:-1]
    recommended_products = df.iloc[related_docs_indices]

    print("\nTop Ad Recommendations for You:")
    display(recommended_products[['ad_copy', 'Brand Name', 'Assigned Keywords', 'Sentiment Score']])

    # Provide suggestions based on sentiment
    avg_sentiment = temp_df['Sentiment Score'].mean()
    if avg_sentiment >= 4:
        ad_suggestion = 'Focus on premium features and positive testimonials.'
        price_suggestion = 'You can price your product above the market average.'
    elif avg_sentiment >= 3:
        ad_suggestion = 'Highlight unique selling points and competitive advantages.'
        price_suggestion = 'Consider competitive pricing.'
    else:
        ad_suggestion = 'Emphasize discounts and improvements over competitors.'
        price_suggestion = 'Pricing below market average may attract more customers.'

    print("\nAd Suggestion: ", ad_suggestion)
    print("Price Suggestion: ", price_suggestion)

# Run the publisher interface
publisher_interface()

# Monitoring and Maintenance (Placeholders)
def monitor_performance():
    # Implement performance tracking metrics like CTR, conversion rates, etc.
    pass

def retrain_model():
    # Implement model retraining logic
    pass


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\amber\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


FileNotFoundError: [Errno 2] No such file or directory: 'output_products_with_keywords.csv'