# Web scapping from Amazon 

## Importing Libraries

In [24]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

## Creating a function to scrap the data from Amazon

In [28]:
def amazon(search_query):
    # Replace spaces in the search query with '+' for URL compatibility
    search_query = search_query.replace(' ', '+')

    # Base URL for Amazon search
    url = f"https://www.amazon.in/s?k={search_query}"

    # Headers to mimic a browser request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.9",
    }

    # Send HTTP GET request
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Failed to fetch data from Amazon. Status Code: {response.status_code}")
        return

    # Parse the HTML content
    soup = BeautifulSoup(response.content, "html.parser")

    # Lists to store the scraped data
    product_names = []
    product_prices = []
    product_ratings = []

    # Scrape product details
    for product in soup.find_all("div", {"data-component-type": "s-search-result"}):
        # Product name
        name = product.h2.text.strip() if product.h2 else None
        product_names.append(name)

        # Product price
        price_whole = product.find("span", class_="a-price-whole")
        price_fraction = product.find("span", class_="a-price-fraction")
        if price_whole:
            price = price_whole.text.strip()
        else:
            price = None
        product_prices.append(price)

        # Product rating
        rating = product.find("span", class_="a-icon-alt")
        if rating:
            rating = re.search(r"\d+(\.\d+)?", rating.text).group() if rating.text else None
        product_ratings.append(rating)

    # Save data to Excel
    data = {
        "Product Name": product_names,
        "Price (INR)": product_prices,
        "Rating": product_ratings,
    }
    df = pd.DataFrame(data)
    file_name = f"Amazon_{search_query}.xlsx"
    df.to_excel(file_name, index=False)

    print(f"Data scraped and saved to {file_name}")

    print(df.head(10))

In [29]:
# User input for search query
if __name__ == "__main__":
    query = input("Enter the product to search on Amazon: ")
    amazon(query)


Enter the product to search on Amazon:  Lenovo legion 5


Data scraped and saved to Amazon_Lenovo+legion+5.xlsx
                                        Product Name Price (INR) Rating
0  Lenovo Legion Pro 7 Intel Core i9-14900HX 16" ...    2,89,990    3.5
1  Lenovo [Smartchoice LOQ 12th Gen Intel Core i5...      68,990    3.9
2  Lenovo Legion 5 Intel Core i7-14650HX 16" (40....    1,29,690    3.4
3  Lenovo Legion 5 Intel Core i7-14650HX 16" (40....    1,40,000    3.8
4  Lenovo Legion 5 AMD Ryzen 7 5800H 15.6" (39.62...      97,500    4.4
5  (Refurbished) Lenovo Legion 5 AMD Ryzen 7 5800...      76,000    3.1
6  (Refurbished) Lenovo Legion 5 Intel Core i7-14...      97,990   None
7  Lenovo Legion 5 Pro AMD Ryzen 7 5800H 16" (40....    1,03,190    4.2
8  Lenovo Legion Slim 5 AI Powered Ryzen 7 7840HS...      99,990    3.5
9  Lenovo Legion 5 Intel Core i7-14650HX 16" (40....    1,67,990    4.6
