In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

URL = "https://mamaearth.in/product-category/fragrance"
HEADERS = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36','Accept-Language': 'en-US,en;q=0.5'}

In [3]:
webpage = requests.get(URL,headers=HEADERS)

if (webpage.status_code == 200 ):
    print("Data Fetched Successfully !")
else:
    print(f"Error{webpage.status_code} !")

soup = BeautifulSoup(webpage.content,'html.parser')


Data Fetched Successfully !


In [4]:
# Extracting product links

product_links = soup.find_all("div",attrs={'class':'ProductCard_Wrapper_DisplayArea'})
# for links_tag in links:
# product_link = "https://mamaearth.in" + links[0].find("a").get('href')

links_list = []
for link in product_links:
    product_link =  "https://mamaearth.in" + link.find("a").get('href')
    links_list.append(product_link)

scrap_data = {'Product_Name':[],'Description':[],'Price':[],'Rating out of 5':[]}


In [5]:
# Function to extract product details:

#Function to extract product name

def fetch_productName(new_soup):
    
    try:
        product_name = new_soup.find("h1",attrs={'class':'ProductDetailsRevamp__ProdName-sc-1w9tx2u-2 hrSRwG'}).text.strip()
    except AttributeError:
        product_name = "Not Available"
        
    return product_name

# Function to extract product description

def fetch_productDescription(new_soup):
    
    try:
        product_description = new_soup.find("div",attrs={'class':'subtitle'}).text.strip()
    except AttributeError:
        product_description = "NIL"
        
    return product_description

# Function to extract product price

def fetch_productPrice(new_soup):
    
    try:
        product_price = new_soup.find("td",attrs={'class':'price'}).text.strip()
    except AttributeError:
        product_price = "Not Available"
        
    return product_price

#Function to extract product Rating

def fetch_productRating(new_soup):
    
    try:
        rating_tag = new_soup.find("a",attrs={'class':'rating'})
        rating_soup = BeautifulSoup(rating_tag.prettify())
        
        span_tags = rating_soup.find_all("span")
        product_rating = span_tags[0].text.strip()+" "+span_tags[1].text.strip()
    except AttributeError:
        product_rating = "NA"
        
    return product_rating
        
    

In [6]:
# Looping through all the links and scraping data :

for link in links_list:
    new_webpage = requests.get(link,headers=HEADERS)
    
    if (new_webpage.status_code != 200 ):
        continue
    new_soup = BeautifulSoup(new_webpage.content,'html.parser')
    
    scrap_data['Product_Name'].append(fetch_productName(new_soup))
    scrap_data['Description'].append(fetch_productDescription(new_soup))
    scrap_data['Price'].append(fetch_productPrice(new_soup))
    scrap_data['Rating out of 5'].append(fetch_productRating(new_soup))
    

In [7]:
# Generating a dataframe using pandas library

mamaEarth_df = pd.DataFrame.from_dict(scrap_data)

# Converting the scrap data to a CSV File

mamaEarth_df.to_csv("mamaEarth_scrapData.csv",index=True)

In [8]:
mamaEarth_df

Unnamed: 0,Product_Name,Description,Price,Rating out of 5
0,Mamaearth ME Discovery Set - 8 ml X 5,Lasts for up to 12 Hours | Phthalate-Free,₹487.00,5.0 ★
1,Mamaearth ME Eau De Parfum For a Fragrance As ...,Unique Individual Fragrance |Lasts Upto 12 Hours,₹699.00,4.8 ★
2,ME Aqua Eau De Parfum For a Wave Of Freshness ...,Lasts up to 12 Hours | Made Safe Certified,₹699.00,4.5 ★
3,ME White Musk Eau De Parfum For a Fragrance Cl...,Lasts Up to 12 Hours | Made Safe Certified,₹699.00,4.6 ★
4,ME Floral Eau De Parfum - Live in the Moment -...,Lasts up to 12 Hours | Made Safe Certified,₹699.00,4.0 ★
5,ME Oud Eau De Parfum to Unleash Your Confidenc...,Lasts up to 12 Hours | Made Safe Certified,₹699.00,4.1 ★
6,ME First Rain Eau De Parfum to Refresh Your Se...,Lasts up to 12 Hours | Made Safe Certified,₹699.00,4.0 ★
7,Mamaearth ME Eau De Parfum For a Fragrance As ...,Unique Individual Fragrance |Lasts Upto 12 Hours,₹434.00,4.9 ★
