In [6]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re

In [35]:
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("h1", attrs={"class":re.compile(r'Title')})
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""
    
    return title_string

# Function to extract Product Price
def get_price(soup):
    try:
        price = soup.find("span", attrs={'class': re.compile(r'SpecialPriceSpan')})
        price_tag = price.find("span", attrs={'class': ''}).string.strip()
        
    except AttributeError:

        try:
            # If there is some deal price
            price_tag = soup.find("span", attrs={'id':'priceblock_dealprice'}).string.strip()

        except:
            price_tag = ""

    return price_tag

def get_offer_price(soup):
    try:
        offer_price = soup.find("div", attrs={'class': re.compile(r'OfferDetailsText2')}).string.strip()
        
        # Use regular expression to find the price
        match = re.search(r'₹(\d+)', offer_price)
        offer_price = match.group(1)
    except:
        offer_price = ""

    return offer_price
    
def get_coupon(soup):
    try:
        coupon = soup.find("div", attrs={'class': re.compile(r'OfferDetailsText2')}).string.strip()

        # Split the line by spaces
        words = coupon.split()
        
        # Get the last word
        coupon = words[-1]
    except:
        coupon = ""
        print("failed")

    return coupon
    
def get_brand(soup):
    try:
        # Outer Tag Object
        brand = soup.find("h2", attrs={"class":re.compile(r'Brand')})
        # Inner NavigatableString Object
        brand_value = brand.text

        # Title as a string value
        brand_string = brand_value.strip()

    except AttributeError:
        brand_string = ""
    
    return brand_string

def get_size(soup):
    try:
        # Outer Tag Object
        size = soup.find("p", attrs={"class":re.compile(r'Size')})
        # Inner NavigatableString Object
        size_value = size.text

        # Title as a string value
        size_string = size_value.strip()
        size_string = size_string.replace("Size :", "").strip()

    except AttributeError:
        size_string = ""
    
    return size_string

def get_extraDetail(soup):
    try:
        # Outer Tag Object
        extraDetail = soup.find("p", attrs={"class":re.compile(r'ProductExtraDetail')})
        # Inner NavigatableString Object
        extraDetail_value = extraDetail.text

        # Title as a string value
        extraDetail_string = extraDetail_value.strip()
    except AttributeError:
        extraDetail_string = ""
    
    return extraDetail_string



def get_product_id(url):
    try:
        # Extract the 5th word from each URL
        product_id = url.split("-")[3]
        print(product_id)
    except AttributeError:
        product_id = ""
    return product_id


# Function to extract Model No.
def get_model_no(soup):
    try:
        model_no = soup.find("span", string="Model No.").find_next_sibling("span").text.strip()
    except AttributeError:
        model_no = ""
    return model_no


# Function to extract Frame Width
def get_frame_width(soup):
    try:
        frame_width = soup.find("span", string="Frame Width").find_next_sibling("span").text.strip()
    except AttributeError:
        frame_width = ""
    return frame_width


# Function to extract Frame Dimensions
def get_frame_dimensions(soup):
    try:
        frame_dimensions = soup.find("span", string="Frame Dimensions").find_next_sibling("span").text.strip()
    except AttributeError:
        frame_dimensions = ""
    return frame_dimensions


# Function to extract Frame Colour
def get_frame_colour(soup):
    try:
        frame_colour = soup.find("span", string="Frame colour").find_next_sibling("span").text.strip()
    except AttributeError:
        frame_colour = ""
    return frame_colour

In [42]:

if __name__ == '__main__':

    # add your user agent 
    HEADERS = ({'User-Agent':'', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    URL = "https://www.lenskart.com/sunglasses.html?pageCount=90"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.text, "html.parser")
    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class': re.compile(r'AnchorWrapper')})
    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
            links_list.append(link.get('href'))


    d = {"title": [], "price": [],"discountedPrice": [],"coupon": [], "brand": [], "size": [], "extraDetail": [], "productID": [], "modelNo": [], "frameWidth": [], "frameDimensions": [], "frameColour": [],"link":[]}
    
    # Loop for extracting product details from each link 
    for link in links_list:
        new_webpage = requests.get("https://www.lenskart.com" + link, headers=HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")
        print(link)
        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['discountedPrice'].append(get_offer_price(new_soup))
        d['coupon'].append(get_coupon(new_soup))
        d['brand'].append(get_brand(new_soup))
        d['size'].append(get_size(new_soup))
        d['extraDetail'].append(get_extraDetail(new_soup))
        d['productID'].append(get_product_id(link))
        d['modelNo'].append(get_model_no(new_soup))
        d['frameWidth'].append(get_frame_width(new_soup))
        d['frameDimensions'].append(get_frame_dimensions(new_soup))
        d['frameColour'].append(get_frame_colour(new_soup))
        d['link'].append(link)
    
    lenskart_df = pd.DataFrame.from_dict(d)
    lenskart_df['title'].replace('', np.nan, inplace=True)
    lenskart_df = lenskart_df.dropna(subset=['title'])
    lenskart_df.to_csv("lenskar_data.csv", header=True, index=False)

/vincent-chase-vc-s11075-c11-sunglasses.html
s11075
/vincent-chase-vc-s15397-c3-sunglasses.html
s15397
/lenskart-boost-lkb-s16443-c1-sunglasses.html
s16443
/vincent-chase-vc-s15999-c1-sunglasses.html
s15999
/vincent-chase-vc-s16340-c1-sunglasses.html
s16340
/vincent-chase-vc-s16145-c5-sunglasses.html
s16145
/vincent-chase-vc-s16558-c2-sunglasses.html
s16558
/vincent-chase-polarized-vc-s11075-c7-sunglasses.html
vc
/vincent-chase-vc-s16341-c3-sunglasses.html
s16341
/vincent-chase-vc-s11110-c11-sunglasses.html
s11110
/vincent-chase-vc-s16561-c2-sunglasses.html
s16561
/lenskart-boost-lkb-s16582-c1-sunglass.html
s16582
/vincent-chase-vc-s16560-c2-sunglasses.html
s16560
/vincent-chase-vc-s-16342-c2-sunglass.html
s
/vincent-chase-vc-s16529-c2-sunglasses.html
s16529
/vincent-chase-vc-s16339-c1-sunglasses.html
s16339
/vincent-chase-vc-s16543-c2-sunglasses.html
s16543
/vincent-chase-vc-s15762-c1-sunglasses.html
s15762
/vincent-chase-vc-s14459-c2-sunglasses.html
s14459
/vincent-chase-vc-s15756-c2

In [43]:
lenskart_df

Unnamed: 0,title,price,discountedPrice,coupon,brand,size,extraDetail,productID,modelNo,frameWidth,frameDimensions,frameColour,link
0,Gold Full Rim Aviator,1500,1000,SINGLE,Vincent Chase Polarized,Medium,,s11075,VC S11075,138 mm,55-16-140,,/vincent-chase-vc-s11075-c11-sunglasses.html
1,Gold Brown Rimless Rectangle/ Square,2000,1500,SINGLE,Vincent Chase,Wide,,s15397,VC S15397,142 mm,60-12-148,,/vincent-chase-vc-s15397-c3-sunglasses.html
2,Tortoise Half Rim Wayfarer,2000,1500,SINGLE,Lenskart Boost,Wide,,s16443,LKB S16443,143 mm,67-20-125,,/lenskart-boost-lkb-s16443-c1-sunglasses.html
3,Black Full Rim Wayfarer,2000,1500,SINGLE,Lenskart Hustlr,Medium,,s15999,VC S15999,141 mm,49-21-150,,/vincent-chase-vc-s15999-c1-sunglasses.html
4,Brown Full Rim Rectangle/ Square,2000,1500,SINGLE,Vincent Chase Polarized,Medium,,s16340,VC S16340,140 mm,49-19-145,Brown,/vincent-chase-vc-s16340-c1-sunglasses.html
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1360,Gunmetal Rimless Square,6000,5000,SINGLE,John Jacobs,Medium,Frame + Lens,e16449,JJ E16449,136 mm,52-19-145,,/john-jacobs-jj-e16449-c2-eyeglasses.html
1361,Silver Full Rim Hexagonal,5000,4000,SINGLE,John Jacobs,Medium,Frame + Lens,e13723,JJ E13723,137 mm,53-16-145,Silver,/john-jacobs-jj-e13723-full-rim-c3-eyeglasses....
1362,Grey Transparent Full Rim Wayfarer,6000,5000,SINGLE,John Jacobs,Extra Wide,,s14698,JJ S14698,145 mm,53-21-140,Grey Transparent,/john-jacobs-jj-s14698-c2-sunglasses.html
1363,White Transparent Full Rim Wayfarer,1000,400,TRYUS,Vincent Chase Online,Wide,,5147,VC S5147/P,143 mm,50-23-153,,/vincent-chase-vc-5147-p-c121-sunglasses.html
