In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import time
import os

In [2]:
def get_soup(url):
    while True:
        try:
            response = requests.get(url)
            response.raise_for_status()
            return BeautifulSoup(response.content, 'html.parser')
        except requests.exceptions.RequestException as e:
            time.sleep(2)
            continue

In [3]:
def scrape_phone_details(phone_url):
    phone_soup = get_soup(phone_url)

    name = phone_soup.find('div', class_='aMaAEs').find('span', class_='B_NuCI').text.strip()
    disc_price = phone_soup.find('div', class_='_30jeq3').text.strip()
    
    price = disc_price
    if phone_soup.find('div', class_='_3I9_wc'):
        price = phone_soup.find('div', class_='_3I9_wc').text.strip()
    
    brand = None
    if phone_soup.find('div', class_='_1MR4o5'):
        brand = phone_soup.find('div', class_='_1MR4o5').find_all('a')[3].text.strip()
    
    rating = None
    if phone_soup.find('div', class_='_3LWZlK'):
        rating = phone_soup.find('div', class_='_3LWZlK').text.strip()
    
    phone = {
        'Name': name,
        'Brand': brand,
        'Price': price,
        'Discounted Price': disc_price,
        'Rating': rating
    }

    specs_table = phone_soup.find_all('table', class_='_14cfVK')
    for spec in specs_table:
        for tr in spec.find_all('tr'):
            td = tr.contents
            if len(td) > 1:
                phone[td[0].text.strip()] = td[1].text.strip()

    return phone

In [4]:
def scrape_flipkart_data(base_url, brand_urls):
    phones = {}
    for brand, url in brand_urls.items():
        phones[brand] = []
        
        brand_soup = get_soup(url)
        page_count = 0
        if brand_soup.find('div', class_='_2MImiq'):
            page_count = int(brand_soup.find('div', class_='_2MImiq').span.text.split()[-1])
        
        for page in range(0, page_count + 1):
            page_url = f'{url}&page={page + 1}'
            page_soup = get_soup(page_url)
            phones_list = page_soup.find_all('div', class_='_13oc-S')
            
            for phone in phones_list:
                phone_url = f"{base_url}{phone.find('a')['href']}"
                phone_specs = scrape_phone_details(phone_url)
                phones[brand].append(phone_specs)
    
    return phones

In [5]:
base_url = 'https://www.flipkart.com'
search = f'{base_url}/search?sid=tyy%2C4io&otracker=CLP_Filters&p%5B%5D=facets.price_range.from%3D10000&p%5B%5D=facets.price_range.to%3DMax'
apple = f'{search}&p%5B%5D=facets.brand%255B%255D%3DAPPLE'
samsung = f'{search}&p%5B%5D=facets.brand%255B%255D%3DSAMSUNG'
google = f'{search}&p%5B%5D=facets.brand%255B%255D%3DGoogle'
nothing = f'{search}&p%5B%5D=facets.brand%255B%255D%3DNothing'
asus = f'{search}&p%5B%5D=facets.brand%255B%255D%3DASUS'
oneplus = f'{search}&p%5B%5D=facets.brand%255B%255D%3DOnePlus'
oppo = f'{search}&p%5B%5D=facets.brand%255B%255D%3DOPPO'
vivo = f'{search}&p%5B%5D=facets.brand%255B%255D%3Dvivo'
mi = f'{search}&p%5B%5D=facets.brand%255B%255D%3DMi'
redmi = f'{search}&p%5B%5D=facets.brand%255B%255D%3DREDMI'
realme = f'{search}&p%5B%5D=facets.brand%255B%255D%3Drealme'
poco = f'{search}&p%5B%5D=facets.brand%255B%255D%3DPOCO'
iqoo = f'{search}&p%5B%5D=facets.brand%255B%255D%3DIQOO'
motorola = f'{search}&p%5B%5D=facets.brand%255B%255D%3DMOTOROLA'

brand_urls = {
    'apple': apple, 
    'samsung': samsung,
    'google': google,
    'nothing': nothing,
    'asus': asus,
    'oneplus': oneplus,
    'oppo': oppo,
    'vivo': vivo,
    'mi': mi,
    'redmi': redmi,
    'realme': realme,
    'poco': poco,
    'iqoo': iqoo,
    'motorola': motorola,
}

In [6]:
phones = scrape_flipkart_data(base_url, brand_urls)
for brand in brand_urls.keys():
    df = pd.DataFrame(phones[brand])
    df.to_csv(f'data/{brand}.csv', index=False)

In [7]:
all_df = []
for file in os.listdir('data/'):
    df = pd.read_csv(f'data/{file}')
    all_df.append(df)

In [8]:
phones_df = pd.concat(all_df)
phones_df.to_csv('data/phones.csv', index=False)