# Flipkart Scrapping

### Checking URL Accessibility

In [4]:
import requests
from bs4 import BeautifulSoup

def check_amazon_page_access(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            # You can further analyze the page structure here to check if it's suitable for scraping
            return True, soup
        else:
            return False, None
    except Exception as e:
        print("Error occurred:", e)
        return False, None

# Example usage:
amazon_url = "https://www.flipkart.com/search?q={query}"
is_accessible, page_soup = check_amazon_page_access(amazon_url)

if is_accessible:
    print("Flipkart page is accessible for scraping!")
    # You can further process the page_soup object to extract desired information
else:
    print("Flipkart page is not accessible for scraping.")


Flipkart page is accessible for scraping!


### Scrapping Flipkart Data (Product name, Product rating, Product price)

In [25]:
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

#### Import Libraries

In [53]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

#### Custom Scrapping function (Also add the functionality of redirecting to the next page and Error handling)

In [54]:
def scrape_flipkart_search(query):
    base_url = f"https://www.flipkart.com/search?q={query}"
    try:
        all_product_info = []
        page_number = 1
        max_retries = 3  # Maximum number of retries
        retries = 0
        while True:
            url = base_url + f'&page={page_number}'
            response = requests.get(url)
            
            # Retry if server error occurs
            if response.status_code == 500 and retries < max_retries:
                retries += 1
                print(f"Retrying request for page {page_number}...")
                time.sleep(2)  # Wait for a few seconds before retrying
                continue
            
            response.raise_for_status()  # Raise an error for bad responses (4xx or 5xx)
            soup = BeautifulSoup(response.content, 'html.parser')
            product_info = []
            for product in soup.find_all('div', class_='_1AtVbE'):
                name_tag = product.find('div', class_='_4rR01T')
                price_tag = product.find('div', class_='_30jeq3')
                rating_tag = product.find('div', class_='_3LWZlK')
                image_tag = product.find('img', class_='_396cs4')
                
                # Check if all required tags are found
                if name_tag and price_tag and image_tag:
                    name = name_tag.text.strip()  # Remove leading/trailing whitespace
                    price = price_tag.text.strip()
                    image_url = image_tag['src']  # Extract image URL
                    
                    # Check if rating tag exists
                    if rating_tag:
                        rating = rating_tag.text.strip()
                    else:
                        rating = ""  # Set rating to an empty string if not available
                    
                    product_info.append({'Image': image_url, 'Name': name, 'Price': price, 'Rating': rating})
            
            # Append product info to the list
            all_product_info.extend(product_info)
            
            # Check if there are more pages to scrape
            next_button = soup.find('a', class_='_1LKTO3')
            if not next_button or 'disabled' in next_button.get('class', []):
                break  # No more pages to scrape
            
            # Move to the next page
            page_number += 1
        
        return all_product_info
    except Exception as e:
        print("Error occurred:", e)
        return None


#### Get user Input and Display the result/Store data to csv

In [55]:
# Take user input for the product search query
query = input("Enter the product you want to search for on Flipkart: ")

# Scrape Flipkart for the given query
product_info = scrape_flipkart_search(query)

if product_info:
    # Store the results in a DataFrame
    df = pd.DataFrame(product_info)
    #print("Product details stored in DataFrame:")
    #print(df.to_string())
    
    # Save the DataFrame to a CSV file
    csv_filename = f"product_details_{query}.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Product details saved to {csv_filename}")
else:
    print("No product details retrieved from Flipkart.")

Enter the product you want to search for on Flipkart: washing machine
Product details saved to product_details_washing machine.csv
