In [25]:
!pip install requests
!pip install beautifulsoup4
!pip install pandas
!pip install scikit-learn



In [9]:
!pip install tqdm



In [1]:
from tqdm.notebook import tqdm

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import time
import random

def scrape_amazon_product_data(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # Request the webpage
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    products = []
    
    # Select product listings
    for product in soup.select('.s-main-slot .s-result-item'):
        try:
            # Extract product name
            name_elem = product.select_one('h2 a span')
            name = name_elem.text.strip() if name_elem else None
            
            # Extract product price
            price_elem = product.select_one('.a-price .a-offscreen')
            price = float(price_elem.text.replace('$', '').replace(',', '').strip()) if price_elem else None
            
            # Extract product rating
            rating_elem = product.select_one('.a-icon-alt')
            rating = float(rating_elem.text.split()[0]) if rating_elem else None
            
            # Only add products with all details available
            if name and price is not None and rating is not None:
                products.append({
                    'name': name,
                    'price': price,
                    'rating': rating
                })
        except (AttributeError, ValueError) as e:
            continue
    
    return products

In [30]:
# Scrape data from Amazon
product= input("Enter the product you want to recommmend for : ")
base_url = 'https://www.amazon.com/s?k='+ product  # Example search for laptops
scraped_data = []
for i in tqdm(range(1, 100)):  # The website has 1000 pages
    url = base_url.format(i)
    scraped_data.extend(scrape_amazon_product_data(url))

# Create a DataFrame from the scraped data
df = pd.DataFrame(scraped_data)
df.head()

Enter the product you want to recommmend :  laptops


  0%|          | 0/99 [00:00<?, ?it/s]

Unnamed: 0,name,price,rating
0,"HP Newest 14"" Ultral Light Laptop for Students...",249.99,4.1
1,"HP 17 Laptop, 17.3” HD+ Display, 11th Gen Inte...",499.0,4.3
2,Acer Aspire 3 A315-24P-R7VH Slim Laptop | 15.6...,299.99,4.4
3,"SGIN Laptop Computer, 15.6 Inch Laptops with 1...",259.99,4.3
4,"Lenovo IdeaPad 1 Laptop, 15.6” FHD Display, AM...",329.99,4.3


In [31]:
# Simulate user ratings for a realistic scenario
user_ids = [1, 2, 3, 4, 5]  # Example user IDs
num_ratings = 20  # Total number of ratings to simulate

user_ratings = []
for _ in range(num_ratings):
    user_id = random.choice(user_ids)  # Randomly select a user ID
    product_name = random.choice(df['name'].tolist())  # Randomly select a product name
    rating = random.randint(1, 5)  # Randomly assign a rating between 1 and 5
    user_ratings.append({'user_id': user_id, 'product_name': product_name, 'rating': rating})

user_ratings_df = pd.DataFrame(user_ratings)
print(user_ratings_df)

    user_id                                       product_name  rating
0         4  15.6" Laptop Computer, Gaming Laptop,16GB RAM ...       4
1         1  ACEMAGIC Laptop Computer AMD Ryzen 7 5700U 16....       2
2         3  HP Notebook Laptop, 15.6" HD Touchscreen, Inte...       1
3         5  HP Portable Laptop, Student and Business, 14" ...       4
4         2  Lenovo IdeaPad 1 Laptop, 15.6” FHD Display, AM...       1
5         5  15.6" Laptop Computer, Gaming Laptop,16GB RAM ...       4
6         2  HP Notebook Laptop, 15.6" HD Touchscreen, Inte...       4
7         1  ASUS 15.6” Vivobook Go Laptop, Intel Celeron N...       3
8         2  HP 15.6" Portable Laptop (Include 1 Year Micro...       3
9         1  HP Newest 14" Ultral Light Laptop for Students...       3
10        4  300W AC Adapter Fit for Lenovo ThinkPad Y9000p...       5
11        3  HP 17 Laptop, 17.3” HD+ Display, 11th Gen Inte...       2
12        2  Laptop Computer, 16GB LPDDR5 RAM Laptop 512GB ...       2
13    

In [32]:
# Aggregate ratings by taking the mean of duplicates
user_ratings_agg = user_ratings_df.groupby(['user_id', 'product_name'])['rating'].mean().reset_index()

# Create a user-item matrix
user_item_matrix = user_ratings_agg.pivot(index='user_id', columns='product_name', values='rating').fillna(0)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)

# The rest of your code remains the same
def get_recommendations(user_id, n=5):
    user_row = user_item_matrix.loc[user_id]
    similar_users = user_similarity[user_id - 1]  # Adjust for 0-based indexing
    
    # Get products the user hasn't rated
    unrated_products = user_row[user_row == 0].index
    
    # Calculate predicted ratings
    predicted_ratings = {}
    for product in unrated_products:
        product_ratings = user_item_matrix[product]
        weighted_sum = sum(similar_users * product_ratings)
        similarity_sum = sum(similar_users)
        predicted_ratings[product] = weighted_sum / similarity_sum if similarity_sum != 0 else 0
    
    # Sort and return top N recommendations
    sorted_recommendations = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)
    return sorted_recommendations[:n]

# Example usage
user_id = 4
recommendations = get_recommendations(user_id)
print(f"Top 5 product recommendations for User {user_id}:")
for product, predicted_rating in recommendations:
    print(f"\n {product}: Predicted rating = {predicted_rating:.2f}")

Top 5 product recommendations for User 4:

 Genuine 230W 19.5V 11.8A AC Charger fit for HP Omen X 2S 15, HP Z2 Mini G4, Zbook 15 G1 G2 17 G2, HP Thunderbolt Dock 230W G2 2UK38AA 924942-001 925141-850 Gaming Laptop Power Supply Adapter: Predicted rating = 1.10

 HP Portable Laptop, Student and Business, 14" HD Display, Intel Quad-Core N4120, 16GB DDR4 RAM, 64GB eMMC, 1 Year Office 365, Webcam, RJ-45, HDMI, Wi-Fi, Windows 11 Home, Silver: Predicted rating = 1.10

 SGIN Laptop Computer, 15.6 Inch Laptops with 12GB RAM 512GB SSD, Celeron N5095 Quad-Core(Up to 2.8GHz), 1920 * 1080P Notebook, 38Wh Battery, Mini HDMI, Webcam, USB 3.0, 5G WiFi, Type-C: Predicted rating = 1.10

 HP Notebook Laptop, 15.6" HD Touchscreen, Intel Core i3-1115G4 Processor, 32GB RAM, 1TB PCIe SSD, Webcam, Type-C, HDMI, SD Card Reader, Wi-Fi, Windows 11 Home, Silver: Predicted rating = 0.62

 Laptop Computer, 16GB LPDDR5 RAM Laptop 512GB NVMe SSD, Intel Quad-Core N100, 15.6 Inch 1080P IPS Display Windows 11 Pro Laptop

In [33]:
df.to_csv('commerse_data.csv', index=False)
user_ratings_df.to_csv('user_ratings.csv' , index=False)