### Standard header for webpage agents

In [41]:
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

### Importing libraries

In [42]:
from urllib.request import urlopen #used to open urls
from bs4 import BeautifulSoup #used for extracting data from html websites
import requests

### Taking Amazon product URL 

In [43]:
URL = "https://www.amazon.in/Sony-PS5-Console-Modern-Warfare/dp/B0CMQPPMB1"
webpage = requests.get(URL, headers = HEADERS)

### Creating a soup with lxml parser

In [44]:
soup = BeautifulSoup(webpage.content, 'lxml')

### Extracting product title and cleaning it

In [45]:
title = soup.find("span", attrs = {"id": "productTitle"})
print(title)

<span class="a-size-large product-title-word-break" id="productTitle">        Sony PS5 Console - Call of Duty Modern Warfare III Bundle       </span>


In [46]:
title_value = title.string
print(title_value)

        Sony PS5 Console - Call of Duty Modern Warfare III Bundle       


In [47]:
title_string = title_value.strip()
print(title_string)

Sony PS5 Console - Call of Duty Modern Warfare III Bundle


### Extracting the price of the product 

In [94]:
price = soup.find("span", attrs = {"class": "a-offscreen"}).string.strip()
print(price)

₹55,390.00


### Extracting the rating star of the product

In [115]:
rating_star = soup.find("i", attrs = {"class": "a-icon a-icon-star a-star-4-5 cm-cr-review-stars-spacing-big"}).string
print(rating_star)

4.5 out of 5 stars


### Extracting the rating count of the product

In [123]:
rating_count = soup.find("span", attrs = {"id": "acrCustomerReviewText"}).string
print(rating_count)

53 ratings


### Checking if the product is available or not

In [134]:
product_availability = soup.find("div", attrs = {"id": "availability"})
product_availability = product_availability.find("span").string
print(product_availability)

 In stock 


### Compiling all the codes in a function

In [21]:
from urllib.request import urlopen #used to open urls
from bs4 import BeautifulSoup #used for extracting data from html websites
import requests

def get_title(soup): 
    try: 
        title = soup.find("span", attrs = {"id": "productTitle"})
        #print(title)
        title_value = title.string
        #print(title_value)
        title_string = title_value.strip()
    except AttributeError: 
        title_string = "NA"
    return title_string

def get_price(soup): 
    try: 
        price = soup.find("span", attrs = {"class": "a-offscreen"}).string.strip()
    except AttributeError: 
        price = "NA"
    return price

def get_rating_star(soup):
    try: 
        rating_star = soup.find("i", attrs = {"class": "a-icon a-icon-star a-star-4-5 cm-cr-review-stars-spacing-big"}).string
    except AttributeError: 
        rating_star = "NA"
    return rating_star

def get_rating_count(soup):
    try: 
        rating_count = soup.find("span", attrs = {"id": "acrCustomerReviewText"}).string
    except AttributeError:
        rating_count = "NA"
    return rating_count

def get_availability(soup):
    try: 
        product_availability = soup.find("div", attrs = {"id": "availability"})
        product_availability = product_availability.find("span").string
    except AttributeError: 
        product_availability = "NA"
    return product_availability

if __name__ == '__main__': 
    # Headers for request
    #HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    # Webpage URL
    URL = input("Enter an amazon product URL")
    #URL = "https://www.amazon.in/Sony-PS5-Console-Modern-Warfare/dp/B0CMQPPMB1"
    #webpage = requests.get(URL)
    webpage = requests.get(URL, headers = HEADERS)

    # Soup object containing webpage dat+a
    soup = BeautifulSoup(webpage.content, 'lxml')
    print("Product Title:", get_title(soup))
    print("***********************************")
    print("Product Price:", get_price(soup))
    print("***********************************")
    print("product Rating Star:", get_rating_star(soup))
    print("***********************************")
    print("Product Rating Count:", get_rating_count(soup))
    print("***********************************")
    print("Product Availability: ", get_availability(soup))

Product Title: Sony PS5 Console
***********************************
Product Price: ₹54,990.00
***********************************
product Rating Star: 4.6 out of 5 stars
***********************************
Product Rating Count: 1,403 ratings
***********************************
Product Availability:   Only 1 left in stock. 


### Checking multiple products based on amazon search 

### Flipkart Scrapping

In [19]:
from urllib.request import urlopen #used to open urls
from bs4 import BeautifulSoup #used for extracting data from html websites
import requests

def get_title(soup): 
    try: 
        title = soup.find("div", attrs = {"class": "_4rR01T"})
        #print(title)
        title_value = title.string
        #print(title_value)
        title_string = title_value.strip()
    except AttributeError: 
        title_string = "NA"
    return title_string

def get_price(soup): 
    try: 
        price = soup.find("div", attrs = {"class": "_30jeq3 _1_WHN1"}).string.strip()
    except AttributeError: 
        price = "NA"
    return price

def get_rating_star(soup):
    try: 
        rating_star = soup.find("div", attrs = {"class": "_3LWZlK"}).string
    except AttributeError: 
        rating_star = "NA"
    return rating_star

def get_rating_count(soup):
    try: 
        rating_count = soup.find("span", attrs = {"class": "_13vcmD"}).string
    except AttributeError:
        rating_count = "NA"
    return rating_count

'''def get_availability(soup):
    try: 
        product_availability = soup.find("div", attrs = {"id": "availability"})
        product_availability = product_availability.find("span").string
    except AttributeError: 
        product_availability = "NA"
    return product_availability'''

if __name__ == '__main__': 
    #Headers for request
    HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    # Webpage URL
    URL = input("Enter flipkar search URL")
    webpage = requests.get(URL, headers = HEADERS)

    # Soup object containing webpage dat+a
    soup = BeautifulSoup(webpage.content, 'lxml')
    
    # Fetching the links based on the tag objects
    links = soup.find_all("a", attrs = {"class": "_1fQZEK"})
    links_list = []
    for link in links: 
        links_list.append(link.get('href'))
    
    for link in links_list:
        new_webpage = requests.get("https://www.flipkart.com" + link, headers = HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "lxml")
    
        print("Product Title:", get_title(new_soup))
        print("***********************************")
        print("Product Price:", get_price(new_soup))
        print("***********************************")
        print("product Rating Star:", get_rating_star(new_soup))
        print("***********************************")
        print("Product Rating Count:", get_rating_count(new_soup))
        print("***********************************")
        #print("Product Availability: ", get_availability(new_soup))'''

Product Title: NA
***********************************
Product Price: NA
***********************************
product Rating Star: None
***********************************
Product Rating Count: &
***********************************
Product Title: NA
***********************************
Product Price: NA
***********************************
product Rating Star: None
***********************************
Product Rating Count: &
***********************************
Product Title: NA
***********************************
Product Price: NA
***********************************
product Rating Star: None
***********************************
Product Rating Count: &
***********************************
Product Title: NA
***********************************
Product Price: NA
***********************************
product Rating Star: None
***********************************
Product Rating Count: &
***********************************
Product Title: NA
***********************************
Product Price: NA
********

KeyboardInterrupt: 

### Amazon Search Scrapping

In [4]:
from bs4 import BeautifulSoup
import requests

# Function to extract Product Title
def get_title(soup):
	try:
		title = soup.find("span", attrs={"id":'productTitle'})
		title_value = title.string
		title_string = title_value.strip()
	except AttributeError:
		title_string = ""	
	return title_string

# Function to extract Product Price
def get_price(soup):
	try:
		price = soup.find("span", attrs={'id':'priceblock_ourprice'}).string.strip()
	except AttributeError:
		try:
			# If there is some deal price
			price = soup.find("span", attrs={'id':'priceblock_dealprice'}).string.strip()
		except:		
			price = ""	
	return price

# Function to extract Product Rating
def get_rating(soup):
	try:
		rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
	except AttributeError:
		try:
			rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
		except:
			rating = ""	
	return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
	try:
		review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()
	except AttributeError:
		review_count = ""	
	return review_count

# Function to extract Availability Status
def get_availability(soup):
	try:
		available = soup.find("div", attrs={'id':'availability'})
		available = available.find("span").string.strip()
	except AttributeError:
		available = "Not Available"	
	return available	

if __name__ == '__main__':

	# Headers for request
	HEADERS = ({'User-Agent':
	            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
	            'Accept-Language': 'en-US'})

	# The webpage URL
	URL = "https://www.amazon.com/s?k=playstation+4&ref=nb_sb_noss_2"
	
	# HTTP Request
	webpage = requests.get(URL, headers=HEADERS)

	# Soup Object containing all data
	soup = BeautifulSoup(webpage.content, "lxml")

	# Fetch links as List of Tag Objects
	links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})

	# Store the links
	links_list = []

	# Loop for extracting links from Tag Objects
	for link in links:
		links_list.append(link.get('href'))


	# Loop for extracting product details from each link 
	for link in links_list:

		new_webpage = requests.get("https://www.amazon.com" + link, headers=HEADERS)

		new_soup = BeautifulSoup(new_webpage.content, "lxml")
		
		# Function calls to display all necessary product information
		print("Product Title =", get_title(new_soup))
		print("Product Price =", get_price(new_soup))
		print("Product Rating =", get_rating(new_soup))
		print("Number of Product Reviews =", get_review_count(new_soup))
		print("Availability =", get_availability(new_soup))
		print()
		print("******************************************************")
		print()