In [36]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import Image, display

# Load and clean the dataset
df = pd.read_csv('amazon-beauty-recommendation.csv')
df_clean = df.dropna(subset=['ProductId', 'ProductType', 'Rating', 'Timestamp', 'URL'])
df_clean = df_clean.drop_duplicates(subset=['ProductId'])

# Reset the index to ensure it's sequential (to avoid mismatch)
df_clean.reset_index(drop=True, inplace=True)

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english', max_features=1000)
tfidf_matrix = tfidf.fit_transform(df_clean['ProductType'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def fetch_image(product_url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    }
    try:
        response = requests.get(product_url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        image_tag = soup.find("img", {"id": "landingImage"})
        if image_tag:
            return image_tag.get("src")
    except Exception as e:
        print(f"Error fetching image for URL {product_url}: {e}")
    return "https://via.placeholder.com/150"

def get_recommendations_with_images(product_id, cosine_sim=cosine_sim, top_n=10):
    # Check if product exists in the DataFrame
    if product_id not in df_clean['ProductId'].values:
        print(f"ProductId {product_id} does not exist in the dataset!")
        return []

    # Get index for the product
    try:
        # Get the index in the cleaned dataframe
        idx = df_clean.index[df_clean['ProductId'] == product_id].tolist()[0]
    except IndexError:
        print(f"ProductId {product_id} not found!")
        return []

    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Ensure we don't try to get more than available recommendations
    sim_scores = sim_scores[1:top_n+1]  # Get top_n recommendations

    product_indices = [i[0] for i in sim_scores]

    recommendations = []
    for i in product_indices:
        # Ensure the index is within bounds
        if i < len(df_clean):
            try:
                product_row = df_clean.iloc[i]
                # Combine similarity score with rating (higher rating is more important)
                combined_score = sim_scores[product_indices.index(i)][1] * product_row['Rating']
                image_url = fetch_image(product_row['URL'])
                recommendations.append({
                    "ProductId": product_row['ProductId'],
                    "ProductType": product_row['ProductType'],
                    "Rating": product_row['Rating'],
                    "CombinedScore": combined_score,
                    "ImageURL": image_url
                })
            except IndexError:
                continue  # Skip invalid index or handle as necessary

    # Sort recommendations by combined score (higher is better)
    recommendations = sorted(recommendations, key=lambda x: x['CombinedScore'], reverse=True)

    return recommendations

# Test the function with a specific product ID (e.g., 'B00KWFDBKE' for mascara)
test_product_id = 'B00FYKVM0S'  # Replace with the ProductId you're testing
recommended_products = get_recommendations_with_images(test_product_id)

# Display recommendations with images
if recommended_products:
    for product in recommended_products:
        print(f"Product ID: {product['ProductId']}")
        print(f"Product Type: {product['ProductType']}")
        print(f"Rating: {product['Rating']}")
        display(Image(url=product['ImageURL']))


Product ID: B00FZH4126
Product Type: Lipstick
Rating: 5


Product ID: B00FZESMKG
Product Type: Lipstick
Rating: 5


Product ID: B00FZ84P7Q
Product Type: Lipstick
Rating: 5


Product ID: B00FZ1CGME
Product Type: Lipstick
Rating: 5


Product ID: B00FYPL3N4
Product Type: Lipstick
Rating: 5


Product ID: B00FYKVM0S
Product Type: Lipstick
Rating: 5


Product ID: B00FY435R2
Product Type: Lipstick
Rating: 5


Product ID: B00FXW4GFU
Product Type: Lipstick
Rating: 5


Product ID: B00FY8Y14E
Product Type: Lipstick
Rating: 4


Product ID: B00FYSZDQ4
Product Type: Lipstick
Rating: 1
