# 1. Web Scraping - Flipkart Laptop Data

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import os

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
# Configuration
BASE_URL = "https://www.flipkart.com/search?q=laptops&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
}
print("Configuration set.")

Configuration set.


In [3]:
def get_product_data(page_num):
    url = f"{BASE_URL}&page={page_num}"
    print(f"Scraping Page {page_num}...")
    
    try:
        response = requests.get(url, headers=HEADERS)
        if response.status_code != 200:
            print(f"Failed to retrieve page {page_num}. Status: {response.status_code}")
            return []
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Container for products
        products = soup.find_all('div', class_='jIjQ8S')
        if not products:
             products = soup.find_all('div', class_='tUxRFH')
        if not products:
            products = soup.find_all('a', class_='k7wcnx')

        page_data = []
        
        for product in products:
            try:
                # Name
                name_tag = product.find('div', class_='RG5Slk')
                if not name_tag:
                     name_tag = product.find('div', class_='KzDlHZ')
                name = name_tag.text.strip() if name_tag else "N/A"
                
                # Price
                price_tag = product.find('div', class_='hZ3P6w')
                if not price_tag:
                    price_tag = product.find('div', class_='Nx9bqj')
                price = price_tag.text.strip() if price_tag else "N/A"
                
                # Rating
                rating_tag = product.find('div', class_='MKiFS6')
                if not rating_tag:
                    rating_tag = product.find('div', class_='XQDdHH')
                rating = rating_tag.text.strip() if rating_tag else "N/A"
                
                # Reviews & Ratings Count
                stats_container = product.find('span', class_='PvbNMB')
                ratings_count = "0"
                reviews_count = "0"
                
                if stats_container:
                    spans = stats_container.find_all('span')
                    for sp in spans:
                        txt = sp.text.strip()
                        if 'Ratings' in txt:
                            ratings_count = txt.replace('Ratings', '').strip()
                        elif 'Reviews' in txt:
                            reviews_count = txt.replace('Reviews', '').strip()
                
                # Features
                features_list = []
                ul = product.find('ul', class_='HwRTzP')
                if ul:
                    features = ul.find_all('li', class_='DTBslk')
                    features_list = [f.text.strip() for f in features]
                
                feature_str = " | ".join(features_list)
                
                page_data.append({
                    'Product Name': name,
                    'Price': price,
                    'Rating': rating,
                    'Ratings Count': ratings_count,
                    'Reviews Count': reviews_count,
                    'Features': feature_str
                })
                
            except Exception as e:
                print(f"Error parsing product: {e}")
                continue
                
        return page_data

    except Exception as e:
        print(f"Error retrieving page {page_num}: {e}")
        return []

print("Scraping function defined.")

Scraping function defined.


In [None]:
# Main execution
all_data = []
target_count = 10  # Reduced for notebook playability
current_page = 1

print("Starting scraping demo...")

while len(all_data) < target_count:
    data = get_product_data(current_page)
    
    if not data:
        print("No data found on this page. Stopping.")
        break
        
    all_data.extend(data)
    print(f"Collected {len(data)} items. Total: {len(all_data)}")
    
    current_page += 1
    
    # Delay to avoid IP ban
    time.sleep(1)
    
    if current_page > 3: 
        break

print(f"Scraping complete. Collected {len(all_data)} items for demo.")

Starting scraping demo...
Scraping Page 1...
Collected 24 items. Total: 24
Scraping complete. Collected 24 items for demo.


In [None]:
# Save demo data (Optional)
# df = pd.DataFrame(all_data)
# df.to_csv('laptops_demo.csv', index=False)
print("Skipping save (to preserve full 'laptops.csv' dataset).")

Skipping save (to preserve full 'laptops.csv' dataset).


In [7]:
# Data Preview
if os.path.exists('laptops.csv'):
    df_preview = pd.read_csv('laptops.csv')
    print("\nFull Dataset Preview (from laptops.csv):")
    print(df_preview.head())
    print(f"\nDataset shape: {df_preview.shape}")
    print(f"Columns: {list(df_preview.columns)}")
else:
    print("laptops.csv not found.")


Full Dataset Preview (from laptops.csv):
                                        Product Name    Price  Rating  \
0  ASUS Vivobook Go 15 AMD Ryzen 3 Quad Core 7320...  ₹30,990     4.3   
1  Samsung Galaxy Book4 Edge Series Copilot AI-PC...  ₹59,550     4.4   
2  Acer Aspire 3 Intel Celeron Dual Core - (8 GB/...  ₹24,699     3.8   
3  Samsung Galaxy Book4 Metal Intel Core i7 13th ...  ₹55,570     4.4   
4  Samsung Galaxy Book5 AI Metal Intel Core Ultra...  ₹68,390     4.6   

  Ratings Count Reviews Count  \
0         1,722           115   
1         2,345           207   
2         7,961           696   
3         4,754           308   
4           434            46   

                                            Features  
0  AMD Ryzen 3 Quad Core Processor | 8 GB LPDDR5 ...  
1  Qualcomm Snapdragon X Processor | 16 GB LPDDR5...  
2  Intel Celeron Dual Core Processor | 8 GB DDR4 ...  
3  Intel Core i7 Processor (13th Gen) | 16 GB LPD...  
4  Intel Core Ultra 5 Processor | 16 GB LPDDR