In [3]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [6]:
class AmazonLaptopScraper:
    def __init__(self, pincode,city):
        """
        Initialize the AmazonLaptopScraper class with pincode and city.

        Args:
            pincode (str): Pincode of the city.
            city (str): Name of the city.
        """
        self.city = city
        self.pincode = pincode
        self.url='https://www.amazon.in/s?k=laptop'
        self.link=f'{self.url}&pincode={self.pincode}'
        self.HEADERS = {
            # User-Agent and other headers to mimic a web browser request
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
            "Accept-Encoding": "gzip, deflate",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "DNT": "1",
            "Connection": "close",
            "Upgrade-Insecure-Requests": "1",
            "Accept-Language": "da, en-gb, en",
            "referer": "https://prerender.io/"
        }  
        
        
    def req_soup(self):
        """
        Request and parse the HTML content of the webpage.

        Returns:
            bs4.element.ResultSet: A result set of links found on the webpage.
        """
        webpage = requests.get(self.link,headers=self.HEADERS)
        soup = BeautifulSoup(webpage.content, "html.parser")
        Links = soup.find_all("a",class_="a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal")
        return Links
    
    def get_product_name(self,soup):
        """
        Extract the product name from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted product name or an empty string if not found.
        """
        try:
            title = soup.find("span",id="productTitle").text.strip()
            
        except AttributeError:
            title = ""
                
        return title
    
    def get_description(self,soup):
        """
        Extract the description from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted description or an empty string if not found.
        """
        try:
            description = soup.find("div",id="feature-bullets").text.strip()
            
        except AttributeError:
            description = ""
        
        return description
    
    def get_category(self,soup):
        """
        Extract the category from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted category name or an empty string if not found.
        """
        try:
            category = soup.find("span",class_="a-list-item").text.strip()
            
        except AttributeError:
            category = ""
        
        return category
    
    def get_mrp(self,soup):
        """
        Extract the mrp price from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted mrp price or an empty string if not found.
        """
        try:
            mrp = soup.find("span",class_="a-price a-text-price").find("span",class_="a-offscreen").text.strip()
            
        except AttributeError:
            mrp = ""
        
        return mrp.replace('₹','')
    
    def get_selling_price(self,soup):
        """
        Extract the selling price from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted selling price or an empty string if not found.
        """
        try:
            selling_price = soup.find("span",class_="a-price-whole").text
            
        except AttributeError:
            selling_price = ""
        
        return selling_price
    
    def get_discount(self,soup):
        """
        Extract the discount from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted discount or an empty string if not found.
        """
        try:
            discount= soup.find("span",class_="a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage").text
            
        except AttributeError:
            discount = ""
        
        return discount.replace('-','')
    
    def get_brand_name(self,soup):
        """
        Extract the brand name from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted brand name or an empty string if not found.
        """
        try:
            brand_name= soup.find("span",class_="a-size-base po-break-word").text
            
        except AttributeError:
            brand_name = ""
        
        return brand_name
    
    def get_image_url(self,soup):
        """
        Extract the image address from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted image address or an empty string if not found.
        """
        try:
            image_url= soup.find("div",id="imgTagWrapperId").find("img").get("src")
            
        except AttributeError:
            image_url = ""
        
        return image_url
    
    def get_laptop_specification(self,soup):
        """
        Extract the laptop specs from the BeautifulSoup object.

        Args:
            soup (BeautifulSoup): BeautifulSoup object of the webpage.

        Returns:
            str: The extracted laptop specs or an empty string if not found.
        """
        try:
            laptop_specification= soup.find("table",class_="a-normal a-spacing-micro").text.strip()
            
        except AttributeError:
            laptop_specification = ""
        
        return laptop_specification
    
    def scrape(self):
        """
        Scrape laptop data from Amazon and save it to a CSV file.

        Returns:
            None
        """
        Links_list=self.req_soup()
        links_list=[]
        for link in Links_list:
            links_list.append(link.get('href'))

        j=0

        d={"Product name":[],"Description":[],"Category":[],"MRP":[],"Selling price":[],"Discount":[],"Brand name":[],"Image url":[],"Laptop specification":[]}
        for link in links_list:
            j+=1
            if j>25:
                break
            new_webpage = requests.get("https://www.amazon.in"+link,headers=self.HEADERS)
            new_soup = BeautifulSoup(new_webpage.content, "html.parser")

            d['Product name'].append(self.get_product_name(new_soup))
            d['Description'].append(self.get_description(new_soup))
            d['Category'].append(self.get_category(new_soup))
            d['MRP'].append(self.get_mrp(new_soup))
            d['Selling price'].append(self.get_selling_price(new_soup))
            d['Discount'].append(self.get_discount(new_soup))
            d['Brand name'].append(self.get_brand_name(new_soup))
            d['Image url'].append(self.get_image_url(new_soup))
            d['Laptop specification'].append(self.get_laptop_specification(new_soup))
            
        
        # Convert dictionary to DataFrame
        amazon_data = pd.DataFrame.from_dict(d)
        # Replace empty strings with NaN
        amazon_data['Product name'].replace('', np.nan, inplace=True)
        # Remove rows with NaN in 'Product name' column
        amazon_data = amazon_data.dropna(subset=['Product name'])
        # Save DataFrame to CSV file
        amazon_data.to_csv(f"laptop_list ({self.city}).csv", header=True, index=False)

            
            

In [5]:
# Instantiate and scrape data for Bangalore and Delhi

bangalore = AmazonLaptopScraper(pincode="560001", city="bangalore")
bangalore.scrape()
# delhi = AmazonLaptopScraper(pincode="110001", city="delhi")
# delhi.scrape()


NameError: name 'json' is not defined