# SharafDG Product Scraping along with it's seller

**Overview:** In this project, I developed a web scraper using Python's Requests module to scrape complete product information from SharafDG, including data from multiple sellers. The scraper extracts more than 15 data parameters such as product title, prices, rating, description, brand, seller information , all_variants , specification, and more.

To scrape product's all sellers, I used a Hardcoded URL request with the parameter product_id. seller_row is used to maintain the multiple sellers rows(number of offers). 

In [1]:
import requests
from lxml import html
import pandas as pd
import re
import json
from datetime import datetime

In [3]:
headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
    }

# Out of stock product
# url = 'https://uae.sharafdg.com/product/eklasse-nose-trimmer-eknt02/?snrai_campaign=DEkCc0OoMLQw&snrai_id=96f50ef2-fea1-4545-aec3-e5eb50936179'
# url = 'https://uae.sharafdg.com/product/braun-beard-trimmer-bt3040/?promo=1475238'
# stripe price
# url = 'https://uae.sharafdg.com/product/braun-beard-trimmer-bt3040/?promo=1748959'
url = 'https://uae.sharafdg.com/product/iphone-12-pro-256gb-pacific-blue/'
url = 'https://uae.sharafdg.com/product/apple-iphone-14-pro-max-256gb-deep-purple-middle-east-version/'

In [4]:
# Product page request
res = requests.get(url, headers=headers)
tree = html.fromstring(res.content)
res.status_code

200

In [5]:
def brand():
    brand = tree.xpath("//div[contains(@class, 'fw-800 w-30') and contains(text(), 'Brand')]//following::div[contains(@class, 'w-60')]/text()")
    
    if brand:
        brand = brand[0].strip()
    else:
        brand = ''
    return brand


print("Brand: ", brand())

Brand:  Apple


In [6]:
def category_breadcrumbs():
    category = tree.xpath('//div[(@class="breadcrumb")]//span[(@itemprop="name")]/text()')
    if category:
        # category = '>'.join([str(x) for x in category])
        # category = category.replace(',', '')
        # Remove last Breadcurmb (product name)
        category.pop()
        category = ' > '.join(category)
    else:
        category = ''
    return category

print("Category Breadcrumbs: ", category_breadcrumbs())

Category Breadcrumbs:  Online Shopping > Mobiles & Tablets > Mobiles


In [7]:
def product_rating():
    product_rating_xp = tree.xpath("//div[contains(@class, 'product_summary__block')]//span[@itemprop='ratingValue']/text()")
    product_rating = ''
    if product_rating_xp:
        # '4.1 out of 5 stars'
        product_rating = re.sub("[^0-9.]+", '', product_rating_xp[0].split()[0])

    return product_rating

print("Product Rating: ", product_rating())

Product Rating:  4.5


In [8]:
def product_reviews():
    product_review_xp = tree.xpath("//div[contains(@class, 'product_summary__block')]//span[@itemprop='reviewCount']/text()")
    product_review = ''
    if product_review_xp:
        #product_review = product_review_xp[0].strip().replace('ratings','').replace('rating','')
        product_review = re.sub("[^0-9]+", '', product_review_xp[0])

    return product_review

print("Product Review: ", product_reviews())

Product Review:  1915


In [9]:
def selected_variant():
    # Selected Variant xpath
    selected_variant_xp1 = tree.xpath("//div[@class='variants clearfix']//strong[@class='current-variant']/text()")
    
    if selected_variant_xp1:
        selected_variant = ' || '.join(list(map(lambda x: x.strip(), selected_variant_xp1)))
        
    return selected_variant

print("Selected Variants: ", selected_variant())

Selected Variants:  Deep Purple || 256 GB || Middle East Version


In [10]:
def all_variants():
    all_variants = ''
    
    # Other variants
    other_variant_xp1 = tree.xpath("//div[contains(@class, 'variant')]//a[contains(@class, 'variant')]/@data-value")
    
    if other_variant_xp1:
        other_variant_list = [x.strip() for x in other_variant_xp1]
    else:
        other_variant_list = []
        
    if other_variant_list:
        all_variants = selected_variant() + ' || '.join(list(map(lambda x: x.strip(), other_variant_list)))
    
    return all_variants

print("All Variants: ", all_variants())

All Variants:  Deep Purple || 256 GB || Middle East VersionSilver || Gold || Space Black || 128 GB || 512 GB || 1 TB || Physical Dual Sim Version || International Version || USA Version


In [11]:
def specification():
    specification_th_xp = tree.xpath("//div[@class='fw-800 w-30']/text()")
    specification_td_xp = tree.xpath("//div[@class='w-60']/text()")

    specification = {}
    if len(specification_th_xp)  == len(specification_td_xp):
        for item_header, item_data  in zip(specification_th_xp,specification_td_xp):
            specification_th_key = item_header.strip()
            specification_td_value = item_data.strip()

            specification[specification_th_key] = specification_td_value

    return specification

print("Specification: ", specification())

Specification:  {'Brand': 'Apple', 'Color': 'Deep Purple', 'Delivery Protocol': 'Activation is mandatory at the time of delivery for purchase completion', 'Model Number': 'MQ9X3AA/A', 'Version': 'Middle East Version', 'OS': 'iOS', 'OS Version': 'iOS 16', 'Sim Slot': 'Dual Sim (E-Sim + Sim Slot)', 'Technology': '5G', 'SIM Card': 'Nano-SIM', 'Screen Size': '6.7 INCH', 'Resolution': '2796 x 1290', 'Type': 'Super Retina XDR display', 'Multitouch': 'Yes', 'Protection': 'Fingerprint-resistant oleophobic coating', 'Chipset': 'A16 Bionic chip', 'CPU': '6‑core', 'CPU Details': '6‑core CPU with 2 performance and 4 efficiency cores', 'GPU': '5‑core GPU', 'Sensors': 'Face ID, LiDAR Scanner, Barometer, High dynamic range gyro, High-g accelerometer, Proximity sensor, Dual ambient light sensors', 'Internal Memory': '256 GIGABYTE', 'Card slot': 'No', 'Rear / Front Camera': '48 + 12 + 12 MP / 12 MP', 'Video Resolution': '4K@24/25/30/60fps, 1080p@25/30/60/120/240fps, 10-bit HDR, Dolby Vision HDR (up to 

In [12]:
def product_id():
    ''' Get the product id for json API data '''
    product_id_xp1 = tree.xpath("//link[@rel='shortlink']/@href")
    product_id = ""
    
    if product_id_xp1:
        product_id = int(product_id_xp1[0].split("p=",1)[1])
    
    return product_id

print("Product Id: ", product_id())

def seller_json_request():
    ''' Return: dictionary of product JSON data '''
    
    params = {
    'id': product_id(),
    'dgmember': 'false',
    }
    
    if product_id:
        # JSON data request for product sellers 
        seller_url = f'https://uae.sharafdg.com/wp-json/sdg/v1/promo'
        seller_res = requests.get(seller_url, headers=headers, params=params)
        
        if seller_res.status_code == 200:
            return json.loads(seller_res.content)
        
    return None 

Product Id:  4062388


In [13]:
product_data = []
product_json_data = seller_json_request()

for row in range(len(product_json_data)):
    name = product_json_data[row]['name']
    url = product_json_data[row]['url'].replace('//', '')
    seller_id = product_json_data[row]['id']
    product_id = product_json_data[row]['product_id']

    new_price = product_json_data[row]['price']
    old_price = product_json_data[row]['crossprice']
    currency = product_json_data[row]['currency']
    warranty = product_json_data[row]['warranty']
    image_urls = ' || '.join(product_json_data[row]['image'])
    seller_name = product_json_data[row]['seller-name']
    delivery_time = product_json_data[row]['delivery']['delivery_time']
    desc = product_json_data[row]['desc']
    shipping_fee = product_json_data[row]['shipping_fee']['shipping_fee_min']
    
    product_data.append({
        'name' : name,
        'url' : url,
        'seller_id' : seller_id,
        'product_id' : product_id,
        'new_price' : new_price,
        'old_price' : old_price,
        'currency' : currency,
        'warranty' : warranty,
        'image_urls': image_urls,
        'seller_name' : seller_name,
        'delivery_time' : delivery_time,
        'desc' : desc,
        'shipping_fee': shipping_fee,
        'brand': brand(),
        'category_breadcrumbs': category_breadcrumbs(),
        'product_rating': product_rating(),
        'product_reviews': product_reviews(),
        'selected_variant': selected_variant(),
        'all_variants': all_variants(),
        'specification': specification(),
    })

In [14]:
pd.DataFrame(product_data)

Unnamed: 0,name,url,seller_id,product_id,new_price,old_price,currency,warranty,image_urls,seller_name,delivery_time,desc,shipping_fee,brand,category_breadcrumbs,product_rating,product_reviews,selected_variant,all_variants,specification
0,Apple iPhone 14 Pro Max 256GB Deep Purple with...,uae.sharafdg.com/product/apple-iphone-14-pro-m...,2913110,4062388,4699.0,5099.0,AED,Warranty 1 Year,https://cdn.sharafdg.com/cdn-cgi/image/width=9...,Sharaf DG,24 April,,14,Apple,Online Shopping > Mobiles & Tablets > Mobiles,4.5,1915,Deep Purple || 256 GB || Middle East Version,Deep Purple || 256 GB || Middle East VersionSi...,"{'Brand': 'Apple', 'Color': 'Deep Purple', 'De..."
1,Apple iPhone 14 Pro Max 256GB Deep Purple with...,uae.sharafdg.com/product/apple-iphone-14-pro-m...,2845764,4062388,4761.0,5099.0,AED,Warranty One Year Warranty,https://cdn.sharafdg.com/cdn-cgi/image/width=9...,SE LLC,24 April,UAE Authorized Product With 1 Year Brand Warra...,14,Apple,Online Shopping > Mobiles & Tablets > Mobiles,4.5,1915,Deep Purple || 256 GB || Middle East Version,Deep Purple || 256 GB || Middle East VersionSi...,"{'Brand': 'Apple', 'Color': 'Deep Purple', 'De..."
2,Apple iPhone 14 Pro Max 256GB Deep Purple with...,uae.sharafdg.com/product/apple-iphone-14-pro-m...,2847655,4062388,4761.0,5099.0,AED,Warranty One Year Warranty,https://cdn.sharafdg.com/cdn-cgi/image/width=9...,SE LLC,26 Apr - 27 Apr,UAE Authorized Product With 1 Year Brand Warra...,13,Apple,Online Shopping > Mobiles & Tablets > Mobiles,4.5,1915,Deep Purple || 256 GB || Middle East Version,Deep Purple || 256 GB || Middle East VersionSi...,"{'Brand': 'Apple', 'Color': 'Deep Purple', 'De..."
3,Apple iPhone 14 Pro Max 256GB Deep Purple with...,uae.sharafdg.com/product/apple-iphone-14-pro-m...,2806216,4062388,4889.0,5099.0,AED,Warranty One Year Warranty,https://cdn.sharafdg.com/cdn-cgi/image/width=9...,PAPITA,26 Apr - 27 Apr,Why you should buy from us: Best Price Genuine...,13,Apple,Online Shopping > Mobiles & Tablets > Mobiles,4.5,1915,Deep Purple || 256 GB || Middle East Version,Deep Purple || 256 GB || Middle East VersionSi...,"{'Brand': 'Apple', 'Color': 'Deep Purple', 'De..."
4,Apple iPhone 14 Pro Max 256GB Deep Purple with...,uae.sharafdg.com/product/apple-iphone-14-pro-m...,2837569,4062388,4898.0,5099.0,AED,Warranty One Year Warranty,https://cdn.sharafdg.com/cdn-cgi/image/width=9...,GOLDEN EYE ELECTRONICS,27 Apr - 28 Apr,OFFICIAL UAE TRA VERSION - 1-YEAR APPLE WARRANTY,13,Apple,Online Shopping > Mobiles & Tablets > Mobiles,4.5,1915,Deep Purple || 256 GB || Middle East Version,Deep Purple || 256 GB || Middle East VersionSi...,"{'Brand': 'Apple', 'Color': 'Deep Purple', 'De..."
5,Apple iPhone 14 Pro Max 256GB Deep Purple with...,uae.sharafdg.com/product/apple-iphone-14-pro-m...,2800457,4062388,4899.0,5099.0,AED,Warranty One Year Warranty,https://cdn.sharafdg.com/cdn-cgi/image/width=9...,Planet Telecom,26 Apr - 27 Apr,1 YEAR WARRANTY - TRA STOCK - FAST SHIPPING,13,Apple,Online Shopping > Mobiles & Tablets > Mobiles,4.5,1915,Deep Purple || 256 GB || Middle East Version,Deep Purple || 256 GB || Middle East VersionSi...,"{'Brand': 'Apple', 'Color': 'Deep Purple', 'De..."
6,Apple iPhone 14 Pro Max 256GB Deep Purple with...,uae.sharafdg.com/product/apple-iphone-14-pro-m...,2912576,4062388,4949.0,5099.0,AED,Warranty One Year Warranty,https://cdn.sharafdg.com/cdn-cgi/image/width=9...,O2 Trading,27 Apr - 28 Apr,,13,Apple,Online Shopping > Mobiles & Tablets > Mobiles,4.5,1915,Deep Purple || 256 GB || Middle East Version,Deep Purple || 256 GB || Middle East VersionSi...,"{'Brand': 'Apple', 'Color': 'Deep Purple', 'De..."


# Block Testing

In [None]:
params = {
    'id': product_id(),
    'dgmember': 'false',
}

In [None]:
# JSON data request for product sellers 
seller_url = f'https://uae.sharafdg.com/wp-json/sdg/v1/promo'
seller_res = requests.get(seller_url, headers=headers, params=params)
seller_res.status_code

In [None]:
# Convert dict to string
# product_json_data = json.dumps(product_json_data)

# Convert data to dict
product_json_data = json.loads(seller_res.content)
product_json_data

In [None]:
def extract_date():
    # For Timezone - import pytz library
    # time_now = datetime.datetime.now(tz=pytz.timezone('Asia/Dubai'))
    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')

print("Extract Date: ", extract_date())

In [None]:
def product_name():
    product_name_xp1 = tree.xpath("//div[contains(@class, 'product_summary__block')]//h1[@class='product_title entry-title']/text()")
    
    product_name = ''
    if product_name_xp1:
        product_name = product_name_xp1[0].strip()
    
    return product_name

product_name()

In [None]:
seller_row = 0

In [None]:
def number_of_offers():
    buybox_button_seller_id = tree.xpath("//div[@class='product-cart__wrapper']//button[contains(@class, 'add_to_cart_button') or contains(@class, 'ajax_add_to_cart')]/@data-promo_id")
    button_OutOfStock = tree.xpath("//div[(@class='pdp-price-cart')]//p[(@id='out-of-stock-box')]")
    number_of_offers_xp = tree.xpath('//div[(@class="more-offer-icons pdp-icon")]//a[contains(@class, "more-offers")]/text()')

    number_of_offers = '0'
    if buybox_button_seller_id and number_of_offers_xp:
        buybox_button_seller_id = int(buybox_button_seller_id[0].strip())
        if product_json_data:
            seller_product_id = product_json_data[seller_row]['id']
            seller_product_id = int(seller_product_id)
            # Check Buybox button seller id matches with json seller data id
            if buybox_button_seller_id == seller_product_id:
                # Converted to string, split and take 1st value from list
                number_of_offers_xp = number_of_offers_xp[0].split()
                number_of_offers_xp = number_of_offers_xp[0].strip()
                number_of_offers = number_of_offers_xp
    elif button_OutOfStock:
        number_of_offers = '0'

    return number_of_offers

print("Number of offers: ", number_of_offers())