# Scraping through Flipkart searches
So here I attempted to create a small code to scrap products with their `features`, `price`, `rating` etc through searching products

### Install these libraries if you dont have these installed on your system

In [1]:
# !pip install requests
# !pip install beautifulsoup4

### Following libraries were used

In [61]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

In [22]:
#just to widen the column width
pd.set_option('display.max_colwidth',50)

### In order to scrap product details you need to run the cell below and enter your `search keyword` and `no of pages you want to scrap`

In [55]:
#for taking inputs
search=str(input("Enter what you want to search!"))
search_object=search
n_pages=int(input("Enter the no of pages you want to scrap"))

#processing the search keyword for fitting in the url
x=""
for i in search.split():
    x+=i+'%20'
search=x

#url template with custom search and page no
url_template=f"https://www.flipkart.com/search?q={search}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page="

#creating urls for user defined no of pages
urls=[url_template+str(i) for i in range(1,n_pages+1)]

#initiating an empty list data
data=[]
for url in urls: 
    #to make http request get the response html page and read it
    soup=BeautifulSoup(requests.get(url).text)

    #to jump to the tag containing all the data
    main=soup.find('div',class_="_1HmYoV _35HD7C").find_next_sibling()

    #to loop over each product in the page
    for i,a in enumerate(main.find_all('div',class_="bhgxx2 col-12-12")):    
        
        #try & except is used because some products may not have price mentioned so at that time an error will shoot and except statemnt will execute
        try:
            price=a.find('a').find('div',class_="_1vC4OE _2rQ-NK").get_text()
            price="".join(re.findall("\d",price))
        except:
            price="Not shown"
        
        #product page link is stored from a tag using href
        link=f"https://www.flipkart.com{a.find('a').get('href')}"
        
        
        try:
            rating_count=a.find('a').find('div',class_="niH0FQ").find('span').find_next_sibling().span.span.get_text()
            rating_count="".join(re.findall("\d",rating_count))
        except:
            rating_count="unrated"
        
        #product name contains brand name and then some key features
        features=[]
        features.append(" ".join(a.find('a').find('div',class_="_3wU53n").get_text().split()[1:]))
        bname=a.find('a').find('div',class_="_3wU53n").get_text().split()[0]
        
        #there may be products that dont have any rating so to avoid errors we use try & except
        try:
            rating=a.find('a').find('div',class_="hGSR34").get_text()
        except:
            rating="unrated"
            
        #Here we loop over all the features in 'ul' tag --> 'li' tag
        for j in a.find('a').find('ul',class_="vFw0gD").children:
            features.append(j.get_text())
        
        #appending all the data into a list
        data.append([bname,features,price,rating,rating_count,link])
        
        #most important step because we are looping over 2 extra tags which are actually not product tags 
        #refer page inspect
        if i==len(main.find_all('div',class_="bhgxx2 col-12-12"))-3:
            break

Enter what you want to search!led tv
Enter the no of pages you want to scrap60


-------

#### Details in the red box are grabbed from the product search page for each product
<img src='Untitled.png' />

In [56]:
pd.DataFrame(data,columns=['brand','features','price','rating','rating_count','link'])

Unnamed: 0,brand,features,price,rating,rating_count,link
0,Mi,[4A PRO 80 cm (32) HD Ready LED Smart Android ...,12499,4.4,463724,https://www.flipkart.com/mi-4a-pro-80-cm-32-hd...
1,Mi,[4A 100 cm (40) Full HD LED Smart Android TV W...,17999,4.4,463724,https://www.flipkart.com/mi-4a-100-cm-40-full-...
2,Mi,[4A Pro 108 cm (43) Full HD LED Smart Android ...,21999,4.4,463724,https://www.flipkart.com/mi-4a-pro-108-cm-43-f...
3,Samsung,"[80cm (32 inch) HD Ready LED Smart TV, Netflix...",13999,4.4,1676,https://www.flipkart.com/samsung-80cm-32-inch-...
4,Mi,[4X 108 cm (43) Ultra HD (4K) LED Smart Androi...,24999,4.4,25037,https://www.flipkart.com/mi-4x-108-cm-43-ultra...
...,...,...,...,...,...,...
979,Samsung,[40 Inches Full HD LED UA40D5000PRMXL Televisi...,66900,4.3,38,https://www.flipkart.com/samsung-40-inches-ful...
980,LG,"[108cm (43 inch) Full HD LED Smart TV, Full HD...",61240,2.7,6,https://www.flipkart.com/lg-108cm-43-inch-full...
981,Samsung,"[(40 inch) Full HD LED TV, Full HD 1920 x 1080...",53500,unrated,unrated,https://www.flipkart.com/samsung-40-inch-full-...
982,Sony,[BRAVIA 55 inches Full HD 3D LED KDL-55HX750 T...,159900,5,18,https://www.flipkart.com/sony-bravia-55-inches...


In [58]:
df=pd.DataFrame(data,columns=['brand','features','price','rating','rating_count','link'])

In [60]:
df.to_csv(f'output/{search_object}.csv',index=False)