<a href="https://colab.research.google.com/github/ashi-agrawal-06/Python-bot-for-Flipkart-data-scraping/blob/main/flipkart_scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from tqdm import tqdm


def get_page_soup(url):
    try:
        page = requests.get(url)
        if page.status_code == 200:
            return BeautifulSoup(page.text, 'lxml')
        else:
            print('\n error',page.status_code)
            return None
    except:
        print("internet/domain error")
        return None


def extract_products(soup):
    cards = soup.find_all('div',{'class':'_4ddWXP'})
    if len(cards) > 0:
        print("\n",len(cards),"products found")
        return cards
    else:
        print("products not found")
        return None


def parse_rev_link(link1):
  r=requests.get(link1)
  nj=BeautifulSoup(r.text,'lxml')
  link2="https://www.flipkart.com"+nj.find_all('div',class_='_2c2kV-')[-1].next_sibling['href']
  return extract_reviews(link2)

def extract_reviews(link2):
  rev=[]
  for k in range(1,5):
    b=requests.get(f'{link2}&page={k}')
    t=BeautifulSoup(b.text,'lxml')
    for x in t.find_all('div',class_="col _2wzgFH K0kLPL"):
      #head=x.find('p',class_="_2-N8zT").text
      detail=" ".join(list(x.find('div',class_="t-ZTKy").stripped_strings)[:-1])
      #rev.append({'Head-review':head,'Detailed-review':detail})
      rev.append(detail)
  return rev


def all_specs(link1):
  req=requests.get(link1)
  specs_p=BeautifulSoup(req.text,'lxml')
  specs=specs_p.find('div',class_='_1UhVsV _3AsE0T')
  specification={}
  for i in specs.find_all('tr',class_='_1s_Smc row'):
    specification.update({i.find('td').text:i.find('li').text})
  return specification
    

def parse_product(product):
    title = product.find('a', {'class':'s1Q9rs'}).text.strip().replace(",",'')
    try:
        price = product.find('div',{'class':'_30jeq3'}).text
    except:
        price=None
    try:
        stars = product.find('div',{'class':'_3LWZlK'}).text
    except:
        stars = None

    try:
        link1 = "https://www.flipkart.com"+product.find('a', {'class':'s1Q9rs'})['href']
        reviews=parse_rev_link(link1)
    except:
        reviews = None
    
    try:
        link1 = "https://www.flipkart.com"+product.find('a', {'class':'s1Q9rs'})['href']
        specifications=all_specs(link1)
    except:
        specifications = {}

    try:
        imglink = product.find('img',{'class':'_3LWZlK'}).attrs.get('src')
    except:
        imglink = None

    specifications.update({'title':title,
            'price':price, 
            'stars':stars,
            'reviews':reviews,
            'imglink':imglink})
    
    return specifications

In [None]:
dataset = []
for num in tqdm(range(1,2)):
    url = f'https://www.flipkart.com/search?q=mobile&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page={num}'
    soup = get_page_soup(url)    
    if soup:
        product_list = extract_products(soup)
        if product_list:
            for product in tqdm(product_list):
                data = parse_product(product)
                dataset.append(data)
        else:
            print("no products | closing scraper")
            continue
    else:
        print("no page data | closing scraper")
        continue
else:
    print("scraper closed")
df = pd.DataFrame(dataset)
df.to_csv('flipcart_mobiles.csv')
print("saved successfully")

  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/40 [00:00<?, ?it/s][A


 40 products found



  2%|▎         | 1/40 [00:22<14:41, 22.62s/it][A
  5%|▌         | 2/40 [00:32<11:52, 18.75s/it][A
  8%|▊         | 3/40 [00:42<09:58, 16.18s/it][A
 10%|█         | 4/40 [00:52<08:31, 14.20s/it][A
 12%|█▎        | 5/40 [01:02<07:33, 12.96s/it][A
 15%|█▌        | 6/40 [01:12<06:49, 12.04s/it][A
 18%|█▊        | 7/40 [01:21<06:14, 11.35s/it][A
 20%|██        | 8/40 [01:31<05:49, 10.92s/it][A
 22%|██▎       | 9/40 [01:41<05:29, 10.62s/it][A
 25%|██▌       | 10/40 [01:51<05:10, 10.34s/it][A
 28%|██▊       | 11/40 [02:01<04:55, 10.19s/it][A
 30%|███       | 12/40 [02:11<04:46, 10.23s/it][A
 32%|███▎      | 13/40 [02:21<04:36, 10.24s/it][A
 35%|███▌      | 14/40 [02:31<04:22, 10.10s/it][A
 38%|███▊      | 15/40 [02:41<04:07,  9.91s/it][A
 40%|████      | 16/40 [02:50<03:58,  9.92s/it][A
 42%|████▎     | 17/40 [03:01<03:50, 10.04s/it][A
 45%|████▌     | 18/40 [03:10<03:38,  9.94s/it][A
 48%|████▊     | 19/40 [03:21<03:32, 10.10s/it][A
 50%|█████     | 20/40 [03:31<03:20, 10

scraper closed
saved successfully





In [None]:
df

Unnamed: 0,In The Box,Model Number,Model Name,Color,Browse Type,SIM Type,Hybrid Sim Slot,Touchscreen,OTG Compatible,Sound Enhancements,SAR Value,Display Size,Resolution,Resolution Type,GPU,Display Type,Display Colors,Other Display Features,Operating System,Processor Type,Processor Core,Primary Clock Speed,Secondary Clock Speed,Operating Frequency,Internal Storage,RAM,Expandable Storage,Supported Memory Card Type,Memory Card Slot Type,Primary Camera Available,Primary Camera,Primary Camera Features,Secondary Camera Available,Secondary Camera,Secondary Camera Features,Flash,HD Recording,Full HD Recording,Video Recording,Video Recording Resolution,...,Browser,Quick Charging,Call Log Memory,Optical Zoom,Image Editor,Call Wait/Hold,Hands Free,Video Call Support,Phone Book,Call Timer,Speaker Phone,Speed Dialing,Logs,NFC,Social Networking Phone,Instant Message,Business Phone,Removable Battery,MMS,Voice Input,Predictive Text Input,Series,FM Radio Recording,3G,Conference Call,Call Divert,USB Tethering,Keypad,Talk Time,Digital Zoom,DLNA Support,Mini HDMI Port,Dual Battery,Supported Languages,Phone Book Memory,Mobile Tracker,Keypad Type,Games,Music Player,Battery Type
0,"Handset, Adapter, Micro USB Cable, SIM Card To...",RMX2185,C11,Rich Green,Smartphones,Dual Sim,No,Yes,Yes,Dirac Sound Effect,"Head: 1.007 W/kg, Body: 0.65 W/kg",16.51 cm (6.5 inch),1600 x 720 Pixels,HD+,GE8320,HD+ In-cell LCD Display,16.7M,"20:9 Screen Ratio, 88.7% Screen-to-body Ratio,...",Android 10,Mediatek Helio G35,Octa Core,2.3 GHz,1.8 GHz,"GSM Bands: 850/900/1800/1900, WCDMA Bands: 1/5...",32 GB,2 GB,256 GB,microSD,Dedicated Slot,Yes,13MP + 2MP,"13MP + 2MP Rear Camera Setup, Sensor Sizes/Pix...",Yes,5MP Front Camera,"5MP Front Camera, Sensor Sizes/Pixel Data: 1/5...",Rear Single LED Flash,Yes,Yes,Yes,"720P (at 30 fps), 1080P (at 30fps)",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,"Handset, Adapter, Micro USB Cable, SIM Card To...",RMX2185,C11,Rich Grey,Smartphones,Dual Sim,No,Yes,Yes,Dirac Sound Effect,"Head: 1.007 W/kg, Body: 0.65 W/kg",16.51 cm (6.5 inch),1600 x 720 Pixels,HD+,GE8320,HD+ In-cell LCD Display,16.7M,"20:9 Screen Ratio, 88.7% Screen-to-body Ratio,...",Android 10,Mediatek Helio G35,Octa Core,2.3 GHz,1.8 GHz,"GSM Bands: 850/900/1800/1900, WCDMA Bands: 1/5...",32 GB,2 GB,256 GB,microSD,Dedicated Slot,Yes,13MP + 2MP,"13MP + 2MP Rear Camera Setup, Sensor Sizes/Pix...",Yes,5MP Front Camera,"5MP Front Camera, Sensor Sizes/Pixel Data: 1/5...",Rear Single LED Flash,Yes,Yes,Yes,"720P (at 30 fps), 1080P (at 30fps)",...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,"Handset, Power Adapter, Micro USB Cable, SIM E...",MZB07RIIN,C3,Arctic Blue,Smartphones,Dual Sim,No,Yes,Yes,,"Head: 0.506 W/kg, Body: 0.833 W/kg",16.59 cm (6.53 inch),1600 x 720 Pixels,HD+,IMG PowerVR GE8320,HD+ In-cell LCD Display,,"20:9 Aspect Ratio, Panda Glass Screen Protecti...",Android 10,Mediatek Helio G35,Octa Core,2.3 GHz,1.8 GHz,"GSM: B2/B3/B5/B8, WCDMA: B1/B5/B8, 4G FDD LTE:...",32 GB,3 GB,512 GB,microSD,Dedicated Slot,Yes,13MP + 2MP + 2MP,"13MP + 2MP + 2MP Rear Camera Setup, Primary (1...",Yes,5MP Front Camera,"5MP Front Camera, f/2.2 Aperture, 1.12μm Pixel...",Rear LED Flash,Yes,Yes,Yes,1080P (at 30 fps),...,Google Chrome,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,"Handset, Travel Adaptor (25W), Data Cable (USB...",SM-E625FZKDINS,Galaxy F62,Laser Grey,Smartphones,Dual Sim,No,Yes,Yes,,,17.02 cm (6.7 inch),2400 x 1080 Pixels,Full HD+,ARM Mali G76 MP12,Full HD+ Super AMOLED Display,16M,,Android 11,Exynos 9825,Octa Core,2.73 GHz,1.95 GHz,,128 GB,6 GB,1 TB,microSD,Dedicated Slot,Yes,64MP + 12MP + 5MP + 5MP,"Quad Rear Camera, Photography Features: Single...",Yes,32MP Front Camera,Smart Selfie Angle,Rear Flash,,Yes,Yes,4K (3840 x 2160),...,"Google Chrome, Samsung S-Browser 11.2",Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,Yes,Yes,Yes,F Series,Yes,,,,,,,,,,,,,,,,,
4,"Handset, Power Adapter, Micro USB Cable, SIM E...",MZB07RKIN,C3,Lime Green,Smartphones,Dual Sim,No,Yes,Yes,,"Head: 0.506 W/kg, Body: 0.833 W/kg",16.59 cm (6.53 inch),1600 x 720 Pixels,HD+,IMG PowerVR GE8320,HD+ In-cell LCD Display,,"20:9 Aspect Ratio, Panda Glass Screen Protecti...",Android 10,Mediatek Helio G35,Octa Core,2.3 GHz,1.8 GHz,"GSM: B2/B3/B5/B8, WCDMA: B1/B5/B8, 4G FDD LTE:...",32 GB,3 GB,512 GB,microSD,Dedicated Slot,Yes,13MP + 2MP + 2MP,"13MP + 2MP + 2MP Rear Camera Setup, Primary (1...",Yes,5MP Front Camera,"5MP Front Camera, f/2.2 Aperture, 1.12μm Pixel...",Rear LED Flash,Yes,Yes,Yes,1080P (at 30 fps),...,Google Chrome,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,"Handset, Power Adaptor, USB Type-C Cable, SIM ...",MZB087AIN,M3,Cool Blue,Smartphones,Dual Sim,No,Yes,Yes,"Dual Speakers, Hi-Res Audio Certified","Head - 0.868 W/Kg, Body - 0.865 W/Kg",16.59 cm (6.53 inch),2340 x 1080 Pixels,Full HD+,Adreno 610,Full HD+ Display,,"60Hz Refresh Rate, Corning Gorilla Glass 3, As...",Android 10,Qualcomm Snapdragon 662,Octa Core,2 GHz,,"2G GSM: B2/B3/B5/B8, 3G WCDMA: B1/B5/B8, 4G FD...",64 GB,6 GB,512 GB,microSD,Dedicated Slot,Yes,48MP + 2MP + 2MP,"48MP (Wide) + 2MP (Macro) + 2MP (Depth), Tripl...",Yes,8MP Front Camera,"8MP Front Camera (1.12μm, f/2.05 Aperture, 4P ...",Rear Flash,Yes,Yes,Yes,"Rear Camera: 1080p (at 30fps), Slo-mo (at 120f...",...,Google Chrome,Yes,,,,,,,,,,,,,,,,No,,,,,,Yes,,,,,,,,,,,,,,,,
6,"Handset, Power Adapter, Micro USB Cable, SIM E...",MZB07RHIN,C3,Matte Black,Smartphones,Dual Sim,No,Yes,Yes,,"Head: 0.506 W/kg, Body: 0.833 W/kg",16.59 cm (6.53 inch),1600 x 720 Pixels,HD+,IMG PowerVR GE8320,HD+ In-cell LCD Display,,"20:9 Aspect Ratio, Panda Glass Screen Protecti...",Android 10,Mediatek Helio G35,Octa Core,2.3 GHz,1.8 GHz,"GSM: B2/B3/B5/B8, WCDMA: B1/B5/B8, 4G FDD LTE:...",32 GB,3 GB,512 GB,microSD,Dedicated Slot,Yes,13MP + 2MP + 2MP,"13MP + 2MP + 2MP Rear Camera Setup, Primary (1...",Yes,5MP Front Camera,"5MP Front Camera, f/2.2 Aperture, 1.12μm Pixel...",Rear LED Flash,Yes,Yes,Yes,1080P (at 30 fps),...,Google Chrome,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,"Handset, Power Adapter (5V/2A), Micro USB Cabl...",MZB0813IN / MZB080SIN,9i,Midnight Black,Smartphones,Dual Sim,No,Yes,Yes,Single Bottom Opening Speaker,"Head: 0.528 W/kg, Body: 0.791 W/kg",16.59 cm (6.53 inch),1600 x 720 Pixels,HD+,IMG GE8320 (at 650 MHz),HD+ IPS Display,,"Panda Glass, 20:9 Screen Ratio",Android 10,MediaTek Helio G25,Octa Core,2 GHz,1.5 GHz,"GSM: B2, B3, B5, B8, WCDMA: B1, B5, B8, 4G LTE...",64 GB,4 GB,512 GB,microSD,Dedicated Slot,Yes,13MP Rear Camera,"13MP Rear Camera Setup (f/2.2, 1.0μm, PDAF), H...",Yes,5MP Front Camera,"5MP Front Camera (f/2.2, 1.12μm), Portrait, HD...",Rear LED Flash,Yes,Yes,Yes,"720P (at 30 fps), 1080P (at 30 fps)",...,Google Chrome,,,,,,,,,,,,,,,,,No,,,,,,,,,,,,,,,,,,,,,,
8,"Handset, Power Adaptor, USB Type-C Cable, SIM ...",MZB087CIN,M3,Yellow,Smartphones,Dual Sim,No,Yes,Yes,"Dual Speakers, Hi-Res Audio Certified","Head - 0.868 W/Kg, Body - 0.865 W/Kg",16.59 cm (6.53 inch),2340 x 1080 Pixels,Full HD+,Adreno 610,Full HD+ Display,,"60Hz Refresh Rate, Corning Gorilla Glass 3, As...",Android 10,Qualcomm Snapdragon 662,Octa Core,2 GHz,,"2G GSM: B2/B3/B5/B8, 3G WCDMA: B1/B5/B8, 4G FD...",64 GB,6 GB,512 GB,microSD,Dedicated Slot,Yes,48MP + 2MP + 2MP,"48MP (Wide) + 2MP (Macro) + 2MP (Depth), Tripl...",Yes,8MP Front Camera,"8MP Front Camera (1.12μm, f/2.05 Aperture, 4P ...",Rear Flash,Yes,Yes,Yes,"Rear Camera: 1080p (at 30fps), Slo-mo (at 120f...",...,Google Chrome,Yes,,,,,,,,,,,,,,,,No,,,,,,Yes,,,,,,,,,,,,,,,,
9,"Handset, Power Adaptor, USB Type-C Cable, SIM ...",MZB0879IN,M3,Power Black,Smartphones,Dual Sim,No,Yes,Yes,"Dual Speakers, Hi-Res Audio Certified","Head - 0.868 W/Kg, Body - 0.865 W/Kg",16.59 cm (6.53 inch),2340 x 1080 Pixels,Full HD+,Adreno 610,Full HD+ Display,,"60Hz Refresh Rate, Corning Gorilla Glass 3, As...",Android 10,Qualcomm Snapdragon 662,Octa Core,2 GHz,,"2G GSM: B2/B3/B5/B8, 3G WCDMA: B1/B5/B8, 4G FD...",64 GB,6 GB,512 GB,microSD,Dedicated Slot,Yes,48MP + 2MP + 2MP,"48MP (Wide) + 2MP (Macro) + 2MP (Depth), Tripl...",Yes,8MP Front Camera,"8MP Front Camera (1.12μm, f/2.05 Aperture, 4P ...",Rear Flash,Yes,Yes,Yes,"Rear Camera: 1080p (at 30fps), Slo-mo (at 120f...",...,Google Chrome,Yes,,,,,,,,,,,,,,,,No,,,,,,Yes,,,,,,,,,,,,,,,,


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Columns: 126 entries, In The Box to Battery Type
dtypes: object(126)
memory usage: 39.5+ KB


In [None]:
df.columns

Index(['In The Box', 'Model Number', 'Model Name', 'Color', 'Browse Type',
       'SIM Type', 'Hybrid Sim Slot', 'Touchscreen', 'OTG Compatible',
       'Sound Enhancements',
       ...
       'DLNA Support', 'Mini HDMI Port', 'Dual Battery', 'Supported Languages',
       'Phone Book Memory', 'Mobile Tracker', 'Keypad Type', 'Games',
       'Music Player', 'Battery Type'],
      dtype='object', length=126)

In [None]:
df['reviews'][0]

['Good phone for a normal usage. I gifted it to my mom works well for her.',
 'Product is nice also i ordered in 1st sale i like display and battery also camera is good its value for money smartphone Display 6.55 inch best part also 1st changed camera module looks primum ...',
 "The 6.5-inch screen is fairly decent, and is good enough for games and videos. You get Android 10 with Realme UI, which offers quite a lot of customisation options but has a lot of preloaded apps. The 13-megapixel rear camera takes decent shots, and there's also a depth sensor for portraits.",
 'Very good budget phone , its good affordable smartphone in this price range , best in class 5000mah battery with powerful gaming processor , overall fabulous package .',
 'I love realme and love you too Flipkart. Super 💯👌 mobile with super prize and specs',
 'Wow amazing product this price and value for money  and display is amazing nice mobile phone this price I am happy battery life is good',
 "This is amazing product