## Webscrapping 

In [1]:
# requests module is a popular Python library for making HTTP requests. 
# It allows the script to send HTTP requests to URLs and receive responses.
import requests
# BeautifulSoup is a library used for parsing HTML and XML documents and extracting data from them
from bs4 import BeautifulSoup as bs
# urlopen is used to open URLs and retrieve data from them
from urllib.request import urlopen 
# logging module can keep track and log events during the script's execution 
# It can be used to output log messages to different destinations, such as the console or a log file.
import logging

In [2]:
# Creating a URL for the search query "redmi" on Flipkart
flipcart_url = "https://www.flipkart.com/search?q=" + "redmi"

In [3]:
# The URL for the search query "redmi" on Flipkart is stored in the variable flipcart_url.
# We can now print the URL to see its value.
flipcart_url

'https://www.flipkart.com/search?q=redmi'

In [4]:
# Sending an HTTP request to the URL and receiving the response using the urlopen function.
# This will open the URL and retrieve data from it.
# The "urlclient" variable is not an appropriate name for the response object.
# we can rename it to "url_response" for clarity.
urlclient = urlopen(flipcart_url)

In [5]:
# The variable "urlclient" holds the response object obtained after sending an HTTP request
# and receiving the response from the Flipkart URL.
# display the value of urlclient variable
urlclient

<http.client.HTTPResponse at 0x2159f88b160>

In [6]:
# We can extract the HTML content from the response using the read() method and
# store it in flipcart_page variable
flipcart_page = urlclient.read()

In [7]:
# Extracting the HTML content from the response using BeautifulSoup.
# html.parser is an argument passed to the BeautifulSoup constructor 
# It is the built-in HTML parser provided by BeautifulSoup
flipcart_html = bs(flipcart_page,'html.parser')

In [8]:
# URL for the search query "iphone12pro" on Flipkart
flipkart_url = "https://www.flipkart.com/"+"apple-iphone-12-pro-gold-512-gb/p/itm157b3be191fd1?pid=MOBFWBYZGKFYVTWH&lid=LSTMOBFWBYZGKFYVTWHR65UKC&marketplace=FLIPKART&q=iphone12pro&store=tyy%2F4io&srno=s_1_2&otracker=search&iid=68136d77-b91a-42de-a358-1a7956c836a5.MOBFWBYZGKFYVTWH.SEARCH&ssid=ogmp5gq1740000001676967134521&qH=712933e6bd68e7b9"

In [9]:
# show the value stored in flipkart_url variable
flipkart_url

'https://www.flipkart.com/apple-iphone-12-pro-gold-512-gb/p/itm157b3be191fd1?pid=MOBFWBYZGKFYVTWH&lid=LSTMOBFWBYZGKFYVTWHR65UKC&marketplace=FLIPKART&q=iphone12pro&store=tyy%2F4io&srno=s_1_2&otracker=search&iid=68136d77-b91a-42de-a358-1a7956c836a5.MOBFWBYZGKFYVTWH.SEARCH&ssid=ogmp5gq1740000001676967134521&qH=712933e6bd68e7b9'

In [10]:
# Now, let's find all the div elements with the class "_1AtVbE col-12-12" in the HTML content.
# These div elements are usually big boxes that contain information about products on Flipkart.
# bigbox variable will hold a list of all the matching div elements found in the HTML content.
bigbox = flipcart_html.findAll("div" , {"class":"_1AtVbE col-12-12"})
# The search results for the query "iphone12pro" likely include
# multiple product listings, each represented by one of these div elements.

In [11]:
# The variable "bigbox" contains a list of all the matching div elements found in the HTML content.
# Each div element represents a product listing on Flipkart for the search query "iphone12pro".
# The len() function returns the number of elements in the list, which corresponds to the number of product listings found.
len(bigbox)

29

In [12]:
# This line of code deletes the first three elements from the 'bigbox' list.
# The 'del' keyword is used to remove elements from a list based on their index positions.
# In this case, we are removing the elements at index 0, 1, and 2, which correspond to the first three elements in the list.
del bigbox[0:3]

In [13]:
# This line of code constructs the product link URL by extracting the 'href' attribute of the 'a' tag within the fourth div element of 'bigbox' list.
# The 'a' tag typically contains the hyperlink to the product page.
# So, 'bigbox[3]' corresponds to the fourth product listing div element.
# The 'div' attribute is used to access the inner div element within this product listing div element.
# We then navigate down three levels to reach the 'a' tag, and finally, extract the 'href' attribute to get the product link URL.
productlink = "https://www.flipkart.com" + bigbox[3].div.div.div.a['href']

In [14]:
# The variable "productlink" contains the URL of a specific product on Flipkart.
# The "requests.get()" function is used to send an HTTP GET request to that URL and retrieve the HTML content of the page.
# The response from the server is stored in the "product_req" variable.
product_req = requests.get(productlink)

In [15]:
# The variable "productlink" contains the URL of a specific product on Flipkart.
# It was constructed in the previous code by extracting the 'href' attribute of the 'a' tag 
# within the fourth div element of the 'bigbox' list, which represents a product listing on Flipkart.
productlink

'https://www.flipkart.com/redmi-a2-sea-green-64-gb/p/itm5d49be2c0a95a?pid=MOBGPVYEHMPGNCBZ&lid=LSTMOBGPVYEHMPGNCBZENSHBF&marketplace=FLIPKART&q=redmi&store=search.flipkart.com&srno=s_1_6&otracker=search&fm=organic&iid=a252e103-69c6-4eb0-afa4-3f4eb86c20a1.MOBGPVYEHMPGNCBZ.SEARCH&ppt=None&ppn=None&ssid=vbipnz87tc0000001691071581326&qH=9b6bf0057c19bd94'

In [16]:
# The 'product_req.text' contains the HTML content of the product page.
# The 'bs()' function from BeautifulSoup library is used to parse this HTML content using the 'html.parser' parser.
# The parsed HTML content is stored in the 'product_html' variable, which can be used to extract information from the page.
product_html = bs(product_req.text,'html.parser')

In [17]:
# This line of code uses the 'find_all()' method of the 'product_html' BeautifulSoup object to find all div elements
# that have a class attribute equal to "_16PBlm". This class attribute is often used to uniquely identify a specific section
# on the product page where information about the product is stored.
# The 'find_all()' method returns a list of all matching div elements, and this list is stored in the 'comment_box' variable.
# The variable 'comment_box' now contains a list of all the div elements on the product page that have the class "_16PBlm".
# These div elements are likely to contain information about the product, such as comments, reviews, or other details.
comment_box = product_html.find_all("div",{"class" : "_16PBlm"})

In [18]:
len(comment_box)

11

In [19]:
# The provided code is a loop that iterates through each element in the 'comment_box' list,
# which contains div elements with class "_16PBlm". For each div element, it retrieves a specific
# 'p' tag with class "_2sc7ZR _2V5EHH" and prints its text content.
for i in comment_box:
    print(i.div.div.find_all('p',{"class":"_2sc7ZR _2V5EHH"})[0].text)

Flipkart Customer
Koushik Das
Hashim Bhaijaan
Rafikul  Shaikh 
Flipkart Customer
Rakesh  Kumar 
Flipkart Customer
Flipkart Customer
Sahanwaz  Alam 
KOUSHIK RAWANI


AttributeError: 'NoneType' object has no attribute 'div'

In [20]:
# Solution of the above error
# Loop through each element in the 'comment_box' list
for i in comment_box:
    # Check if the element is not None
    if i.div:
        # Access the 'div' attribute and find the 'p' tags with class "_2sc7ZR _2V5EHH"
        p_tags = i.div.div.find_all('p', {"class": "_2sc7ZR _2V5EHH"})
        if p_tags:
            # Print the text content of the first 'p' tag
            print(p_tags[0].text)

Flipkart Customer
Koushik Das
Hashim Bhaijaan
Rafikul  Shaikh 
Flipkart Customer
Rakesh  Kumar 
Flipkart Customer
Flipkart Customer
Sahanwaz  Alam 
KOUSHIK RAWANI


In [21]:
# This code iterates through each element in the 'comment_box' list,
# which contains div elements with class "_16PBlm". For each div element,
# it checks if it has a 'div' attribute (i.e., it's not None).
# If it has a 'div' attribute, it then accesses the 'div' attribute and finds
# the 'div' tags within it. It continues to access nested 'div' tags four times
# (div.div.div.div) to reach the desired 'text' content and finally prints it.
for i in comment_box:
     if i.div:
            print(i.div.div.div.div.text)

4
1
5
5
1
3
5
1
5
5


In [22]:
# This updated code iterates through each element in the 'comment_box' list,
# which contains div elements with class "_16PBlm". For each div element,
# it checks if it has a 'div' attribute (i.e., it's not None).
# If it has a 'div' attribute, it then accesses the first 'div' tag within it.
# Next, it accesses the second 'div' tag within the first 'div' tag.
# Finally, it accesses the 'p' tag within the second 'div' tag and prints its text content.
for i in comment_box:
    if i.div:
        print(i.div.div.div.p.text)

Good quality product
Don't waste your money
Classy product
Wonderful
Worthless
Just okay
Simply awesome
Utterly Disappointed
Mind-blowing purchase
Perfect product!


In [23]:
# Access the first element (index 0) in the 'comment_box' list.
# The element is a div with class "_16PBlm".
first_div = comment_box[0]

# Access the 'div' attribute of the first div element.
# The 'div' attribute contains nested div elements.
nested_divs = first_div.div

# Find all the div elements within the first 'div' tag with an empty class attribute.
# The 'find_all' method returns a list of div elements that match the criteria.
div_elements = nested_divs.find_all('div', {"class": ''})

# Access the first element (index 0) from the list of div elements.
# This element is also a div with class "_16PBlm".
second_div = div_elements[0]

# Access the 'div' attribute of the second div element.
# This 'div' attribute contains the desired 'text' content.
desired_text = second_div.div.text

# Print the desired text content.
print(desired_text)

Good product üëç


In [24]:
# This updated code iterates through each element in the 'comment_box' list,
# which contains div elements with class "_16PBlm". For each div element,
# it uses the 'find_all' method to find all the div elements within the first 'div' tag with an empty class attribute.
# It then accesses the first element (index 0) from the list of div elements.
# This element is also a div with class "_16PBlm".
# Finally, it accesses the 'div' attribute of this second div element and prints its text content.
for i in comment_box:
    if i.div:
        print(i.div.div.find_all('div',{"class":''})[0].div.text)

Good product üëç
It's have no unique features, like fingerprint sensor, face lock, button mirror system not available, not mi account, and more problems
Vary nice I m Happy
Nice
Very very bad product, there is no casting option, no fingerprint, battery backup is low, waste of money n no refund policy..I suggest don't use Flipkart for mobile order anymore
Outgoing sound slo
Awesome
This phone is hiting problem availableThis phone is not good project
Very good
good


In [25]:
# This updated code iterates through each element in the 'bigbox' list.
# For each element, it checks if it has a 'div' attribute using the 'if i.div' condition.
# If the 'div' attribute is present, it proceeds to access the URL of the product.
# The product URL is obtained by navigating through nested 'div' elements using the 'i.div.div.div.a['href']' syntax.
# Finally, it prints the complete URL, which is constructed by prefixing "https://www.flipkart.com" to the extracted URL.
for i in bigbox:
    if i.div and i.div.div and i.div.div.div and i.div.div.div.a:
        print("https://www.flipkart.com" + i.div.div.div.a['href'])

https://www.flipkart.com/redmi-11-prime-peppy-purple-128-gb/p/itm52d6b0fe396e9?pid=MOBGZHFGUHYSHQYM&lid=LSTMOBGZHFGUHYSHQYMWGOOJ0&marketplace=FLIPKART&q=redmi&store=search.flipkart.com&srno=s_1_3&otracker=search&fm=organic&iid=a252e103-69c6-4eb0-afa4-3f4eb86c20a1.MOBGZHFGUHYSHQYM.SEARCH&ppt=None&ppn=None&ssid=vbipnz87tc0000001691071581326&qH=9b6bf0057c19bd94
https://www.flipkart.com/redmi-21-59cm-8-5-inch-lcd-writing-pad-smart-lock-abs-material-kids-adults/p/itm0cf80d35651cb?pid=ETYGGWZZVNRBWPDP&lid=LSTETYGGWZZVNRBWPDPKWJOBY&marketplace=FLIPKART&q=redmi&store=search.flipkart.com&srno=s_1_4&otracker=search&fm=organic&iid=a252e103-69c6-4eb0-afa4-3f4eb86c20a1.ETYGGWZZVNRBWPDP.SEARCH&ppt=None&ppn=None&ssid=vbipnz87tc0000001691071581326&qH=9b6bf0057c19bd94
https://www.flipkart.com/redmi-a1-light-green-32-gb/p/itmecc9e7ba0a1df?pid=MOBGGYBAHSAJVKHP&lid=LSTMOBGGYBAHSAJVKHPLQ4OVB&marketplace=FLIPKART&q=redmi&store=search.flipkart.com&srno=s_1_5&otracker=search&fm=organic&iid=a252e103-69c6-4eb0-

In [26]:
# The 'type()' function is used to determine the data type of the result.
# The result will be a string, as it represents a URL.
type(bigbox[4].div.div.div.a['href'])

str

### scrap pwskills details


In [27]:
# Send a GET request to the URL to fetch the webpage's content
pwskills_url = requests.get("https://pwskills.com/course/Data-Science-masters")

In [28]:
# Parse the HTML content of the webpage using BeautifulSoup
pwskills_html = bs(pwskills_url.text,'html.parser')

In [29]:
# The result will now contain a list of all the 'div' elements with class '_Hero_course-desc__26_LL'.
pwskills_html.find_all('div',{'class':'Hero_course-desc__26_LL'})

[]

In [30]:
# This line of code simply prints the variable 'pwskills_html', which contains the parsed HTML content from the webpage.
# It will output the entire content of 'pwskills_html', which may be quite lengthy depending on the webpage's structure.
pwskills_html

<!DOCTYPE html>
<html lang="en"><head><meta charset="utf-8"/><meta content="width=device-width" name="viewport"/><title>500: Internal Server Error</title><meta content="3" name="next-head-count"/><meta content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" name="robots"/><meta content="3zu432x1d81yi55yiwh5v553u9nuc1" name="facebook-domain-verification"/><meta content="en_US" name="locale"/><meta content="website" property="og:type"/><meta content="https://pwskills.com/images/pwskills_thumbnail.png" property="og:image"/><meta content="PW Skills" property="og:site_name"/><meta content="PW Skills" property="og:image:alt"/><meta content="image/png" property="og:image:type"/><meta content="560" property="og:image:width"/><meta content="292" property="og:image:height"/><meta content="summary_large_image" property="twitter:card"/><meta content="@pw__skills" name="twitter:site"/><meta content="https://pwskills.com/images/pwskills_thumbnail.png" property="twitter

## Image scrapping

In [1]:
# Import the necessary libraries for web image scraping and working with URLs
import requests # Used for sending HTTP requests to fetch webpage content
from bs4 import BeautifulSoup # Used for parsing HTML content
from urllib.request import urlopen as uReq # Used for opening URLs
import logging # Used for logging messages
import pymongo # Used for interacting with MongoDB databases
import os # Used for interacting with the operating system

In [2]:
# 'save_dir' is the directory where we want to save the images.
# We are setting it to "images/" for now.
# 'os.path.exists(save_dir)' checks if the directory 'images/' already exists in the file system.
# 'if not os.path.exists(save_dir)' checks if the directory 'images/' does not exist.
# 'os.makedirs(save_dir)' creates the directory 'images/' if it does not exist.
save_dir = "images/"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [3]:
# We set the 'User-Agent' header to imitate a real web browser and prevent being blocked by some websites.
# The 'User-Agent' header provides information about the client making the request (in this case, us).
# Some websites may block requests that don't have a valid 'User-Agent'.
# Here, we are pretending to be Mozilla Firefox on Windows 10.
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}
# Now we have set the headers to use, and we can proceed with making HTTP requests or scraping webpages.
# If you plan to scrape a specific webpage, you can use these headers to avoid potential blocking or access issues.
# Just pass the 'headers' dictionary as an argument when making the request or using 'BeautifulSoup'.
# Below this code, you can continue with your web scraping process using the provided 'headers' dictionary.
# To perform image scraping, you can use the 'requests' library along with 'BeautifulSoup' to fetch the webpage content,
# and then you can use 'os' and 'urllib' libraries to download and save the images in the desired directory.

In [4]:
# Perform the HTTP request to Google Image search using a specific query
query = "sudhanshu kumar"
response = requests.get(f"https://www.google.com/search?q={query}&sxsrf=AJOqlzUuff1RXi2mm8I_OqOwT9VjfIDL7w:1676996143273&source=lnms&tbm=isch&sa=X&ved=2ahUKEwiq-qK7gaf9AhXUgVYBHYReAfYQ_AUoA3oECAEQBQ&biw=1920&bih=937&dpr=1#imgrc=1th7VhSesfMJ4M")

In [5]:
# Create a BeautifulSoup object to parse the HTML content of the response
soup = BeautifulSoup(response.content ,'html.parser')

In [6]:
# This line of code uses BeautifulSoup's find_all() method to search for all "img" tags in the parsed HTML content (soup).
# The find_all() method returns a list of all the occurrences of the "img" tag in the HTML.
# This is useful for web scraping as it allows us to extract all the images present on the webpage.
images_tags = soup.find_all("img")

In [7]:
len(images_tags)

21

In [8]:
# Now, let's delete the first element from the 'images_tags' list using the 'del' statement.
# The 'del' statement is used to remove an item from a list based on its index.
# In this case, we want to remove the first element from the list, which has an index of 0.
del images_tags[0]

In [9]:
# Create an empty list to store the image data and its corresponding URL as a dictionary
img_data_mongo = []

# Iterate through each image tag in the 'images_tags' list
for i in images_tags:
    # Extract the 'src' attribute from the current image tag, which contains the image URL
    image_url = i['src']
    
    # Perform an HTTP request to get the content of the image from the URL
    image_data = requests.get(image_url).content
    
    # Create a dictionary 'mydict' to store the image URL as 'index' and the image data as 'image'
    mydict = {"index": image_url, "image": image_data}
    
    # Append the 'mydict' dictionary to the 'img_data_mongo' list
    img_data_mongo.append(mydict)
    
    # Open a new file in binary write mode (wb) with a filename in the 'save_dir' directory
    # The filename will be in the format "<query>_<index>.jpg"
    # The 'query' is the search query used to retrieve the images, and the 'index' is the current image's index in the 'images_tags' list
    with open(os.path.join(save_dir, f"{query}_{images_tags.index(i)}.jpg"), "wb") as f:
        # Write the image data to the file
        f.write(image_data)

In [10]:
# Connect to the local MongoDB server
client = pymongo.MongoClient('mongodb://localhost:27017/')
# Access the "image_scrap" database
db = client["image_scrap"]
# Access the "image_scrap" collection in the database
coll_image = db["image_scrap"]
# Insert the image data (img_data_mongo) into the "image_scrap" collection
coll_image.insert_many(img_data_mongo)

<pymongo.results.InsertManyResult at 0x24c63b9cac0>

Now we build two projects:

## Review Scarpper With Inserting The Data into MongoDB

Under `static\css` folder, create two files `main.css` and `style.css`. Then under `templates` folder, create three files `base.html`, `index.html` and `results.html`. At last, create `application.py` and `requirements.txt` file. 

To execute the project, run these commands in command line:

`pip install -r requirements.txt`

Now open `MongoDB` and connect and then run the command: `python application.py`
 
Now click on the link: `http://127.0.0.1:8000/`

Then in the `search box` we can enter `samsung`, `iphone11`, `iphone12pro` these names to see the list of matched results from the `flipkart.com`. Also the search result is added inside `MongoDB`.

**main.css**
```css
/* CSS code for styling the page */

/* Set margin and font-family for body and html elements */
body, html {
    margin: 0;
    font-family: sans-serif;
}

/* Set margin and width for elements with the class "content" */
.content {
    margin: 0 auto; /* Center the element horizontally */
    width: 400px;   /* Set the width to 400 pixels */
}

/* Apply border to table, table cells (td), and table header cells (th) */
table, td, th {
    border: 1px solid #aaa; /* Set a 1-pixel solid border with color #aaa */
}

/* Set border-collapse and width for tables */
table {
    border-collapse: collapse; /* Collapse table borders into a single border */
    width: 100%;               /* Set the table width to 100% of its container */
}

/* Set height for table header cells (th) */
th {
    height: 30px; /* Set the height of table header cells to 30 pixels */
}

/* Center text and add padding to table cells (td) */
td {
    text-align: center; /* Center the text inside table cells */
    padding: 5px;       /* Add 5 pixels of padding around the cell content */
}

/* Apply margin to elements with the class "form" */
.form {
    margin-top: 20px; /* Add a top margin of 20 pixels */
}

/* Set width for the element with the ID "content" */
#content {
    width: 70%; /* Set the width to 70% of its container */
}
```

**style.css**
```css
/* Set the background color of the entire page to a light blue shade (#91ced4). */
body {
  background-color: #91ced4;
}

/* Apply box-sizing: border-box to all elements within the body. */
/* This ensures that padding and border widths are included in the element's total width and height. */
body * {
  box-sizing: border-box;
}

/* Style the header element with a dark blue background (#327a81), white text color, and other properties. */
.header {
  background-color: #327a81;
  color: white;
  font-size: 1.5em;
  padding: 1rem;
  text-align: center;
  text-transform: uppercase;
}

/* Style all img elements with a border radius of 50%, and set their height and width to 60px. */
img {
  border-radius: 50%;
  height: 60px;
  width: 60px;
}

/* Style the container div for the table with a border, border radius, box shadow, and max-width of 800px. */
.table-users {
  border: 1px solid #327a81;
  border-radius: 10px;
  box-shadow: 3px 3px 0 rgba(0, 0, 0, 0.1);
  max-width: calc(100% - 2em);
  margin: 1em auto;
  overflow: hidden;
  width: 800px;
}

/* Set the width of the table to 100% and style the table cells (td) and table headers (th). */
table {
  width: 100%;
}
table td, table th {
  color: #2b686e;
  padding: 10px;
}
table td {
  text-align: center;
  vertical-align: middle;
}
table td:last-child {
  font-size: 0.95em;
  line-height: 1.4;
  text-align: left;
}
table th {
  background-color: #daeff1;
  font-weight: 300;
}

/* Apply alternating background colors to table rows to create a striped effect. */
table tr:nth-child(2n) {
  background-color: white;
}
table tr:nth-child(2n+1) {
  background-color: #edf7f8;
}

/* Media query for screens with a maximum width of 700px. */
/* Apply responsive styles to reformat the table for smaller screens. */
@media screen and (max-width: 700px) {
  /* Set the display property of table, table rows (tr), and table cells (td) to block. */
  table, tr, td {
    display: block;
  }

  /* Style the first cell (td) in each row to position it absolutely at the center, and set its width to 100px. */
  td:first-child {
    position: absolute;
    top: 50%;
    -webkit-transform: translateY(-50%);
    transform: translateY(-50%);
    width: 100px;
  }

  /* Style all cells (td) except the first one to clear both, add left margin, set padding, and align left. */
  td:not(:first-child) {
    clear: both;
    margin-left: 100px;
    padding: 4px 20px 4px 90px;
    position: relative;
    text-align: left;
  }

  /* Pseudo element styling to add labels (before content) to each cell (td). */
  td:not(:first-child):before {
    color: #91ced4;
    content: '';
    display: block;
    left: 0;
    position: absolute;
  }

  /* Add specific labels before each cell (td) based on their position in the table. */
  td:nth-child(2):before {
    content: 'Name:';
  }
  td:nth-child(3):before {
    content: 'Email:';
  }
  td:nth-child(4):before {
    content: 'Phone:';
  }
  td:nth-child(5):before {
    content: 'Comments:';
  }

  /* Style each row (tr) to add padding and set its position to relative. */
  tr {
    padding: 10px 0;
    position: relative;
  }

  /* Hide the first row (header row) since it is not needed for the responsive design. */
  tr:first-child {
    display: none;
  }
}

/* Media query for screens with a maximum width of 500px. */
/* Apply additional responsive styles to reformat the table for even smaller screens. */
@media screen and (max-width: 500px) {
  /* Modify the styling of the header element for smaller screens. */
  .header {
    background-color: transparent;
    color: white;
    font-size: 2em;
    font-weight: 700;
    padding: 0;
    text-shadow: 2px 2px 0 rgba(0, 0, 0, 0.1);
  }

  /* Modify the styling of images for smaller screens. */
  img {
    border: 3px solid;
    border-color: #daeff1;
    height: 100px;
    margin: 0.5rem 0;
    width: 100px;
  }

  /* Style the first cell (td) in each row for smaller screens. */
  td:first-child {
    background-color: #c8e7ea;
    border-bottom: 1px solid #91ced4;
    border-radius: 10px 10px 0 0;
    position: relative;
    top: 0;
    -webkit-transform: translateY(0);
    transform: translateY(0);
    width: 100%;
  }

  /* Modify the styling of all cells (td) except the first one for smaller screens. */
  td:not(:first-child) {
    margin: 0;
    padding: 5px 1em;
    width: 100%;
  }

  /* Pseudo element styling for smaller screens to adjust label appearance. */
  td:not(:first-child):before {
    font-size: .8em;
    padding-top: 0.3em;
    position: relative;
  }

  /* Adjust the padding of the last cell (td) for smaller screens. */
  td:last-child {
    padding-bottom: 1rem !important;
  }

  /* Modify the styling of table rows (tr) for smaller screens. */
  tr {
    background-color: white !important;
    border: 1px solid #6cbec6;
    border-radius: 10px;
    box-shadow: 2px 2px 0 rgba(0, 0, 0, 0.1);
    margin: 0.5rem 0;
    padding: 0;
  }

  /* Remove border and box shadow from the table container for smaller screens. */
  .table-users {
    border: none;
    box-shadow: none;
    overflow: visible;
  }
}
```

**base.html**
```html
<!-- This line specifies the document type and version of HTML being used. -->
<!DOCTYPE html>

<!-- This line starts the HTML document and specifies the language as English. -->
<html lang="en">

<head>
    <!-- This line sets the character encoding to UTF-8, which supports various characters and symbols. -->
    <meta charset="UTF-8">

    <!-- This line sets the viewport properties, allowing the page to adjust its layout to fit different screen sizes. -->
    <meta name="viewport" content="width=device-width, initial-scale=1.0">

    <!-- This line defines the compatibility mode for Internet Explorer. -->
    <meta http-equiv="X-UA-Compatible" content="ie=edge">

    <!-- This line links to an external stylesheet, but the link is empty ("href=""") meaning no specific CSS file is being linked. -->
    <link rel="stylesheet" href="{{ url_for('static', filename='css/main.css') }}">

    <!-- This is a placeholder for a block of content that can be filled in later. -->
    <!-- In certain web frameworks, like Django, this allows for reusable templates. -->
    <!-- It's common to override this block in other HTML files that extend this template. -->
    {% block head %}{% endblock %}
</head>

<body>
    <!-- This is another placeholder for a block of content that can be filled in later. -->
    <!-- Similar to the previous block, it's used for extending and overriding templates. -->
    {% block body %}{% endblock %}
</body>

</html>
```

**index.html**
```html
<!-- This line extends a base HTML template named 'base.html'. -->
<!-- It means this file will inherit the content from the 'base.html' template. -->
{% extends 'base.html' %}

<!-- This block is named 'head' and it overrides the 'head' block in the 'base.html' template. -->
{% block head %}

<!-- This line sets the title of the web page to 'Search Page'. -->
<title>Search Page</title>

<!-- This line links to an external stylesheet, but the link is empty ("href=""") meaning no specific CSS file is being linked. -->
<!-- You can fill in the href attribute with the URL of a CSS file to style the page. -->
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
{% endblock %}

<!-- This block is named 'body' and it overrides the 'body' block in the 'base.html' template. -->
{% block body %}

<!-- This <div> element has a class attribute set to 'content', which can be used for styling. -->
<div class="content">

    <!-- This <h1> heading element has its text centered with the style attribute. -->
    <h1 style="text-align: center">Search</h1>

    <!-- This <div> element has a class attribute set to 'form', which can be used for styling. -->
    <div class="form">

        <!-- This <form> element is used to create a search form. -->
        <!-- It has an 'action' attribute set to '/review', which means the form will be submitted to the '/review' URL on form submission. -->
        <!-- The 'method' attribute is set to 'POST', indicating that the form data will be sent to the server using the HTTP POST method. -->
        <form action="/review" method="POST">

            <!-- This <input> element is of type 'text' and has a 'name' attribute set to 'content'. -->
            <!-- It will be used to enter the search content. -->
            <input type="text" name="content" id="content">

            <!-- This <input> element is of type 'submit' and has its value set to 'Search'. -->
            <!-- It will be used as the submit button for the form. -->
            <input type="submit" value="Search">
        </form>
    </div>
</div>
{% endblock %}
```

**results.html**
```html
<!-- This line specifies the document type and version of HTML being used. -->
<!DOCTYPE html>

<!-- This line starts the HTML document and specifies the language as English. -->
<html lang="en">

<head>
    <!-- This line sets the character encoding to UTF-8, which supports various characters and symbols. -->
    <meta charset="UTF-8">

    <!-- This line sets the title of the web page to "Review Page". -->
    <title>Review Page</title>

    <!-- This line links to an external stylesheet from the 'cdnjs' content delivery network (CDN). -->
    <!-- It loads the 'normalize.min.css' file, which helps to standardize styles across different browsers. -->
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css">

    <!-- This line links to a local stylesheet named 'style.css' in the same directory as this HTML file. -->
    <!-- It is used to provide custom styles for the page. -->
    <link rel="stylesheet" href="./style.css">

    <!-- This line links to an empty external stylesheet. -->
    <!-- It means no specific CSS file is being linked. -->
    <!-- You can fill in the href attribute with the URL of another CSS file if needed. -->
    <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
</head>

<body>
    <!-- This <div> element has a class attribute set to 'table-users'. -->
    <!-- It can be used for styling or selecting this element using CSS or JavaScript. -->
    <div class="table-users">
        <!-- This <div> element has a class attribute set to 'header'. -->
        <!-- It can be used for styling or selecting this element using CSS or JavaScript. -->
        <div class="header">Reviews</div>

        <!-- This <table> element is used to create a table to display reviews. -->
        <!-- The 'cellspacing' attribute sets the space between table cells to 0. -->
        <table cellspacing="0">
            <!-- This <tr> element represents a table row and contains table headings (th). -->
            <tr>
                <!-- This <th> element represents a table header cell for the "Product" column. -->
                <th>Product</th>

                <!-- This <th> element represents a table header cell for the "Name" column. -->
                <th>Name</th>

                <!-- This <th> element represents a table header cell for the "Rating" column. -->
                <th>Rating</th>

                <!-- This <th> element represents a table header cell for the "Comment Heading" column. -->
                <th>Comment Heading</th>

                <!-- This <th> element represents a table header cell for the "Comments" column. -->
                <!-- The 'width' attribute is set to "230", which sets the width of this column to 230 pixels. -->
                <th width="230">Comments</th>
            </tr>

                <!-- This is a placeholder for a loop that iterates over a list of reviews. -->
                <!-- The loop is using a template engine like Jinja, Django, or similar. -->
                <!-- The loop will generate table rows (tr) for each review in the 'reviews' list. -->
                {% for review in reviews %}
            <tr>
                <!-- These <td> elements represent table data cells, used to display review details. -->
                <!-- This is where the review details (Product, Name, Rating, Comment Heading, Comments) would be inserted when the loop runs. -->
                <td>{{review['Product']}}</td>
                <td>{{review['Name']}}</td>
                <td>{{review['Rating']}}</td>
                <td>{{review['CommentHead']}}</td>
                <td>{{review['Comment']}} </td>
            </tr>
                {% endfor %}
        </table>
    </div>
</body>

</html>
```

**application.py**
```python
# Import necessary modules from Flask and other libraries.
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS, cross_origin
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import pymongo

# Initialize a Flask app.
application = Flask(__name__)
app = application

# Define a route to display the home page.
@app.route('/', methods=['GET'])
@cross_origin()
def homePage():
    return render_template("index.html")

# Define a route to show review comments in a web UI.
@app.route('/review', methods=['POST', 'GET'])
@cross_origin()
def index():
    if request.method == 'POST':
        try:
            # Get the search string from the submitted form and remove spaces.
            searchString = request.form['content'].replace(" ", "")
            # Construct the Flipkart search URL based on the search string.
            flipkart_url = "https://www.flipkart.com/search?q=" + searchString
            # Open the URL and read its content.
            uClient = uReq(flipkart_url)
            flipkartPage = uClient.read()
            uClient.close()
            # Parse the HTML content using BeautifulSoup.
            flipkart_html = bs(flipkartPage, "html.parser")
            # Find all the big boxes containing product details on the search page.
            bigboxes = flipkart_html.findAll("div", {"class": "_1AtVbE col-12-12"})
            # Delete the first three boxes, as they are not relevant to the products.
            del bigboxes[0:3]
            box = bigboxes[0]
            # Extract the product link from the first big box.
            productLink = "https://www.flipkart.com" + box.div.div.div.a['href']
            # Fetch the content of the product page.
            prodRes = requests.get(productLink)
            prodRes.encoding = 'utf-8'
            prod_html = bs(prodRes.text, "html.parser")
            print(prod_html)
            # Find all the comment boxes on the product page.
            commentboxes = prod_html.find_all('div', {'class': "_16PBlm"})

            # Create a CSV file with the name as the search string and write headers.
            filename = searchString + ".csv"
            fw = open(filename, "w")
            headers = "Product, Customer Name, Rating, Heading, Comment \n"
            fw.write(headers)
            reviews = []
            for commentbox in commentboxes:
                try:
                    # Extract the customer name from the comment box.
                    name = commentbox.div.div.find_all('p', {'class': '_2sc7ZR _2V5EHH'})[0].text
                except:
                    name = 'No Name'

                try:
                    # Extract the rating from the comment box.
                    rating = commentbox.div.div.div.div.text
                except:
                    rating = 'No Rating'

                try:
                    # Extract the comment heading from the comment box.
                    commentHead = commentbox.div.div.div.p.text
                except:
                    commentHead = 'No Comment Heading'

                try:
                    # Extract the customer comment from the comment box.
                    comtag = commentbox.div.div.find_all('div', {'class': ''})
                    custComment = comtag[0].div.text
                except Exception as e:
                    print("Exception while creating dictionary: ", e)

                # Create a dictionary to store review details and append it to the reviews list.
                mydict = {"Product": searchString, "Name": name, "Rating": rating, "CommentHead": commentHead,
                          "Comment": custComment}
                reviews.append(mydict)

            # Connect to MongoDB and insert the review data into the 'review_scrap_data' collection.
            client = pymongo.MongoClient("mongodb://localhost:27017/")
            db = client['review_scrap']
            review_col = db['review_scrap_data']
            review_col.insert_many(reviews)

            # Render the 'results.html' template and pass the reviews data to it.
            return render_template('results.html', reviews=reviews[0:(len(reviews) - 1)])
        except Exception as e:
            print('The Exception message is: ', e)
            return 'something is wrong'
    else:
        # Render the 'index.html' template for the GET request.
        return render_template('index.html')

# Start the Flask application on localhost at port 8000 in debug mode.
if __name__ == "__main__":
    app.run(host='127.0.0.1', port=8000, debug=True)
```

**requirements.txt**
```md
beautifulsoup4==4.9.1
bs4==0.0.1
certifi==2020.6.20
chardet==3.0.4
click==7.1.2
Flask==1.1.2
Flask-Cors==3.0.9
gunicorn==20.0.4
idna==2.10
itsdangerous==1.1.0
Jinja2==2.11.2
MarkupSafe==1.1.1
requests==2.24.0
six==1.15.0
soupsieve==2.0.1
urllib3==1.25.10
Werkzeug==1.0.1
pymongo
```

## Review Scrapper With Logging

Under `static\css` folder, create two files `main.css` and `style.css`. Then under `templates` folder, create three files `base.html`, `index.html` and `results.html`. At last, create `application.py` and `requirements.txt` file. 

To execute the project, run these commands in command line:

`pip install -r requirements.txt`

Now run the command: `python application.py`
 
Now click on the link inside the file `scrapper.log`: `http://127.0.0.1:8000/`

Then in the `search box` we can enter `samsung`, `iphone11`, `iphone12pro` these names to see the list of matched results from the `flipkart.com`.

**main.css**
```css
/* CSS code for styling the page */

/* Set margin and font-family for body and html elements */
body, html {
    margin: 0;
    font-family: sans-serif;
}

/* Set margin and width for elements with the class "content" */
.content {
    margin: 0 auto; /* Center the element horizontally */
    width: 400px;   /* Set the width to 400 pixels */
}

/* Apply border to table, table cells (td), and table header cells (th) */
table, td, th {
    border: 1px solid #aaa; /* Set a 1-pixel solid border with color #aaa */
}

/* Set border-collapse and width for tables */
table {
    border-collapse: collapse; /* Collapse table borders into a single border */
    width: 100%;               /* Set the table width to 100% of its container */
}

/* Set height for table header cells (th) */
th {
    height: 30px; /* Set the height of table header cells to 30 pixels */
}

/* Center text and add padding to table cells (td) */
td {
    text-align: center; /* Center the text inside table cells */
    padding: 5px;       /* Add 5 pixels of padding around the cell content */
}

/* Apply margin to elements with the class "form" */
.form {
    margin-top: 20px; /* Add a top margin of 20 pixels */
}

/* Set width for the element with the ID "content" */
#content {
    width: 70%; /* Set the width to 70% of its container */
}
```

**style.css**
```css
/* Set the background color of the entire page to a light blue shade (#91ced4). */
body {
  background-color: #91ced4;
}

/* Apply box-sizing: border-box to all elements within the body. */
/* This ensures that padding and border widths are included in the element's total width and height. */
body * {
  box-sizing: border-box;
}

/* Style the header element with a dark blue background (#327a81), white text color, and other properties. */
.header {
  background-color: #327a81;
  color: white;
  font-size: 1.5em;
  padding: 1rem;
  text-align: center;
  text-transform: uppercase;
}

/* Style all img elements with a border radius of 50%, and set their height and width to 60px. */
img {
  border-radius: 50%;
  height: 60px;
  width: 60px;
}

/* Style the container div for the table with a border, border radius, box shadow, and max-width of 800px. */
.table-users {
  border: 1px solid #327a81;
  border-radius: 10px;
  box-shadow: 3px 3px 0 rgba(0, 0, 0, 0.1);
  max-width: calc(100% - 2em);
  margin: 1em auto;
  overflow: hidden;
  width: 800px;
}

/* Set the width of the table to 100% and style the table cells (td) and table headers (th). */
table {
  width: 100%;
}
table td, table th {
  color: #2b686e;
  padding: 10px;
}
table td {
  text-align: center;
  vertical-align: middle;
}
table td:last-child {
  font-size: 0.95em;
  line-height: 1.4;
  text-align: left;
}
table th {
  background-color: #daeff1;
  font-weight: 300;
}

/* Apply alternating background colors to table rows to create a striped effect. */
table tr:nth-child(2n) {
  background-color: white;
}
table tr:nth-child(2n+1) {
  background-color: #edf7f8;
}

/* Media query for screens with a maximum width of 700px. */
/* Apply responsive styles to reformat the table for smaller screens. */
@media screen and (max-width: 700px) {
  /* Set the display property of table, table rows (tr), and table cells (td) to block. */
  table, tr, td {
    display: block;
  }

  /* Style the first cell (td) in each row to position it absolutely at the center, and set its width to 100px. */
  td:first-child {
    position: absolute;
    top: 50%;
    -webkit-transform: translateY(-50%);
    transform: translateY(-50%);
    width: 100px;
  }

  /* Style all cells (td) except the first one to clear both, add left margin, set padding, and align left. */
  td:not(:first-child) {
    clear: both;
    margin-left: 100px;
    padding: 4px 20px 4px 90px;
    position: relative;
    text-align: left;
  }

  /* Pseudo element styling to add labels (before content) to each cell (td). */
  td:not(:first-child):before {
    color: #91ced4;
    content: '';
    display: block;
    left: 0;
    position: absolute;
  }

  /* Add specific labels before each cell (td) based on their position in the table. */
  td:nth-child(2):before {
    content: 'Name:';
  }
  td:nth-child(3):before {
    content: 'Email:';
  }
  td:nth-child(4):before {
    content: 'Phone:';
  }
  td:nth-child(5):before {
    content: 'Comments:';
  }

  /* Style each row (tr) to add padding and set its position to relative. */
  tr {
    padding: 10px 0;
    position: relative;
  }

  /* Hide the first row (header row) since it is not needed for the responsive design. */
  tr:first-child {
    display: none;
  }
}

/* Media query for screens with a maximum width of 500px. */
/* Apply additional responsive styles to reformat the table for even smaller screens. */
@media screen and (max-width: 500px) {
  /* Modify the styling of the header element for smaller screens. */
  .header {
    background-color: transparent;
    color: white;
    font-size: 2em;
    font-weight: 700;
    padding: 0;
    text-shadow: 2px 2px 0 rgba(0, 0, 0, 0.1);
  }

  /* Modify the styling of images for smaller screens. */
  img {
    border: 3px solid;
    border-color: #daeff1;
    height: 100px;
    margin: 0.5rem 0;
    width: 100px;
  }

  /* Style the first cell (td) in each row for smaller screens. */
  td:first-child {
    background-color: #c8e7ea;
    border-bottom: 1px solid #91ced4;
    border-radius: 10px 10px 0 0;
    position: relative;
    top: 0;
    -webkit-transform: translateY(0);
    transform: translateY(0);
    width: 100%;
  }

  /* Modify the styling of all cells (td) except the first one for smaller screens. */
  td:not(:first-child) {
    margin: 0;
    padding: 5px 1em;
    width: 100%;
  }

  /* Pseudo element styling for smaller screens to adjust label appearance. */
  td:not(:first-child):before {
    font-size: .8em;
    padding-top: 0.3em;
    position: relative;
  }

  /* Adjust the padding of the last cell (td) for smaller screens. */
  td:last-child {
    padding-bottom: 1rem !important;
  }

  /* Modify the styling of table rows (tr) for smaller screens. */
  tr {
    background-color: white !important;
    border: 1px solid #6cbec6;
    border-radius: 10px;
    box-shadow: 2px 2px 0 rgba(0, 0, 0, 0.1);
    margin: 0.5rem 0;
    padding: 0;
  }

  /* Remove border and box shadow from the table container for smaller screens. */
  .table-users {
    border: none;
    box-shadow: none;
    overflow: visible;
  }
}
```

**base.html**
```html
<!-- This line specifies the document type and version of HTML being used. -->
<!DOCTYPE html>

<!-- This line starts the HTML document and specifies the language as English. -->
<html lang="en">

<head>
    <!-- This line sets the character encoding to UTF-8, which supports various characters and symbols. -->
    <meta charset="UTF-8">

    <!-- This line sets the viewport properties, allowing the page to adjust its layout to fit different screen sizes. -->
    <meta name="viewport" content="width=device-width, initial-scale=1.0">

    <!-- This line defines the compatibility mode for Internet Explorer. -->
    <meta http-equiv="X-UA-Compatible" content="ie=edge">

    <!-- This line links to an external stylesheet, but the link is empty ("href=""") meaning no specific CSS file is being linked. -->
    <link rel="stylesheet" href="{{ url_for('static', filename='css/main.css') }}">

    <!-- This is a placeholder for a block of content that can be filled in later. -->
    <!-- In certain web frameworks, like Django, this allows for reusable templates. -->
    <!-- It's common to override this block in other HTML files that extend this template. -->
    {% block head %}{% endblock %}
</head>

<body>
    <!-- This is another placeholder for a block of content that can be filled in later. -->
    <!-- Similar to the previous block, it's used for extending and overriding templates. -->
    {% block body %}{% endblock %}
</body>

</html>
```

**index.html**
```html
<!-- This line extends a base HTML template named 'base.html'. -->
<!-- It means this file will inherit the content from the 'base.html' template. -->
{% extends 'base.html' %}

<!-- This block is named 'head' and it overrides the 'head' block in the 'base.html' template. -->
{% block head %}

<!-- This line sets the title of the web page to 'Search Page'. -->
<title>Search Page</title>

<!-- This line links to an external stylesheet, but the link is empty ("href=""") meaning no specific CSS file is being linked. -->
<!-- You can fill in the href attribute with the URL of a CSS file to style the page. -->
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
{% endblock %}

<!-- This block is named 'body' and it overrides the 'body' block in the 'base.html' template. -->
{% block body %}

<!-- This <div> element has a class attribute set to 'content', which can be used for styling. -->
<div class="content">

    <!-- This <h1> heading element has its text centered with the style attribute. -->
    <h1 style="text-align: center">Search</h1>

    <!-- This <div> element has a class attribute set to 'form', which can be used for styling. -->
    <div class="form">

        <!-- This <form> element is used to create a search form. -->
        <!-- It has an 'action' attribute set to '/review', which means the form will be submitted to the '/review' URL on form submission. -->
        <!-- The 'method' attribute is set to 'POST', indicating that the form data will be sent to the server using the HTTP POST method. -->
        <form action="/review" method="POST">

            <!-- This <input> element is of type 'text' and has a 'name' attribute set to 'content'. -->
            <!-- It will be used to enter the search content. -->
            <input type="text" name="content" id="content">

            <!-- This <input> element is of type 'submit' and has its value set to 'Search'. -->
            <!-- It will be used as the submit button for the form. -->
            <input type="submit" value="Search">
        </form>
    </div>
</div>
{% endblock %}
```

**results.html**
```html
<!-- This line specifies the document type and version of HTML being used. -->
<!DOCTYPE html>

<!-- This line starts the HTML document and specifies the language as English. -->
<html lang="en">

<head>
    <!-- This line sets the character encoding to UTF-8, which supports various characters and symbols. -->
    <meta charset="UTF-8">

    <!-- This line sets the title of the web page to "Review Page". -->
    <title>Review Page</title>

        <!-- This line links to an external stylesheet from the 'cdnjs' content delivery network (CDN). -->
        <!-- It loads the 'normalize.min.css' file, which helps to standardize styles across different browsers. -->
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css">

        <!-- This line links to a local stylesheet named 'style.css' in the same directory as this HTML file. -->
        <!-- It is used to provide custom styles for the page. -->
        <link rel="stylesheet" href="./style.css">

        <!-- This line links to an empty external stylesheet. -->
        <!-- It means no specific CSS file is being linked. -->
        <!-- You can fill in the href attribute with the URL of another CSS file if needed. -->
        <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
</head>

<body>
    <!-- This <div> element has a class attribute set to 'table-users'. -->
    <!-- It can be used for styling or selecting this element using CSS or JavaScript. -->
    <div class="table-users">
        <!-- This <div> element has a class attribute set to 'header'. -->
        <!-- It can be used for styling or selecting this element using CSS or JavaScript. -->
        <div class="header">Reviews</div>

        <!-- This <table> element is used to create a table to display reviews. -->
        <!-- The 'cellspacing' attribute sets the space between table cells to 0. -->
        <table cellspacing="0">
            <!-- This <tr> element represents a table row and contains table headings (th). -->
            <tr>
                <!-- This <th> element represents a table header cell for the "Product" column. -->
                <th>Product</th>

                <!-- This <th> element represents a table header cell for the "Name" column. -->
                <th>Name</th>

                <!-- This <th> element represents a table header cell for the "Rating" column. -->
                <th>Rating</th>

                <!-- This <th> element represents a table header cell for the "Comment Heading" column. -->
                <th>Comment Heading</th>

                <!-- This <th> element represents a table header cell for the "Comments" column. -->
                <!-- The 'width' attribute is set to "230", which sets the width of this column to 230 pixels. -->
                <th width="230">Comments</th>
            </tr>

                <!-- This is a placeholder for a loop that iterates over a list of reviews. -->
                <!-- The loop is using a template engine like Jinja, Django, or similar. -->
                <!-- The loop will generate table rows (tr) for each review in the 'reviews' list. -->
                {% for review in reviews %}
            <tr>
                <!-- These <td> elements represent table data cells, used to display review details. -->
                <!-- This is where the review details (Product, Name, Rating, Comment Heading, Comments) would be inserted when the loop runs. -->
                <td>{{review['Product']}}</td>
                <td>{{review['Name']}}</td>
                <td>{{review['Rating']}}</td>
                <td>{{review['CommentHead']}}</td>
                <td>{{review['Comment']}} </td>
            </tr>
                {% endfor %}
        </table>
    </div>
</body>

</html>
```

**application.py**
```python
# Importing necessary modules
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS, cross_origin
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import logging

# Configure logging to write logs to a file named "scrapper.log"
logging.basicConfig(filename="scrapper.log", level=logging.INFO)

# Initialize a Flask app.
application = Flask(__name__)
app = application

# Route for the homepage, accessed via GET request.
@app.route("/", methods=['GET'])
@cross_origin()
def homepage():
    return render_template("index.html")

# Route for handling the review search, accessed via POST and GET requests.
@app.route("/review", methods=['POST', 'GET'])
@cross_origin()
def index():
    if request.method == 'POST':
        try:
            # Get the search string from the form and remove any spaces
            searchString = request.form['content'].replace(" ", "")
            # Create the Flipkart URL for the search query
            flipkart_url = "https://www.flipkart.com/search?q=" + searchString
            # Open the URL and read the HTML content
            uClient = uReq(flipkart_url)
            flipkartPage = uClient.read()
            uClient.close()
            # Parse the HTML content using BeautifulSoup
            flipkart_html = bs(flipkartPage, "html.parser")
            # Find all the product containers on the page
            bigboxes = flipkart_html.findAll("div", {"class": "_1AtVbE col-12-12"})
            # Remove the first 3 containers as they are not relevant to the search results
            del bigboxes[0:3]
            # Take the first container as it represents the first product
            box = bigboxes[0]
            # Extract the product link from the container
            productLink = "https://www.flipkart.com" + box.div.div.div.a['href']
            # Get the HTML content of the product page
            prodRes = requests.get(productLink)
            prodRes.encoding = 'utf-8'
            prod_html = bs(prodRes.text, "html.parser")
            # Find all the review containers on the product page
            commentboxes = prod_html.find_all('div', {'class': "_16PBlm"})

            # Prepare to write the reviews to a CSV file
            filename = searchString + ".csv"
            fw = open(filename, "w")
            headers = "Product, Customer Name, Rating, Heading, Comment \n"
            fw.write(headers)
            reviews = []

            # Loop through each review container and extract relevant information
            for commentbox in commentboxes:
                try:
                    # Extract the customer name
                    name = commentbox.div.div.find_all('p', {'class': '_2sc7ZR _2V5EHH'})[0].text
                except:
                    # If the customer name is not available, log it and proceed
                    logging.info("name not found")

                try:
                    # Extract the rating given by the customer
                    rating = commentbox.div.div.div.div.text
                except:
                    # If the rating is not available, mark it as 'No Rating' and log it
                    rating = 'No Rating'
                    logging.info("rating not found")

                try:
                    # Extract the heading of the comment (if available)
                    commentHead = commentbox.div.div.div.p.text
                except:
                    # If the comment heading is not available, mark it as 'No Comment Heading' and log it
                    commentHead = 'No Comment Heading'
                    logging.info("comment heading not found")

                try:
                    # Extract the main comment given by the customer
                    comtag = commentbox.div.div.find_all('div', {'class': ''})
                    custComment = comtag[0].div.text
                except Exception as e:
                    # If the comment is not available, log the exception and proceed
                    logging.info(e)

                # Create a dictionary to store the extracted review information
                mydict = {"Product": searchString, "Name": name, "Rating": rating, "CommentHead": commentHead,
                          "Comment": custComment}
                # Add the dictionary to the list of reviews
                reviews.append(mydict)

            # Log the final list of reviews
            logging.info("log my final result {}".format(reviews))

            # Render the 'results.html' template with the extracted reviews
            return render_template('results.html', reviews=reviews[0:(len(reviews) - 1)])
        except Exception as e:
            # If any exception occurs during the process, log it and return an error message
            logging.info(e)
            return 'something is wrong'

    # If the request method is not POST, render the 'index.html' template
    else:
        return render_template('index.html')

# Run the Flask application on the specified host and port in debug mode.
if __name__ == "__main__":
    app.run(host='127.0.0.1', port=8000, debug=True)
```    

**requirements.txt**
```txt
flask
flask_cors
requests
bs4
```

### Difference between the web project with base.html and project without base.html

The difference between the two web projects is that one of them uses a <base> element to specify a base URL for all relative URLs in the document, while the other one does not. The <base> element is usually placed inside the <head> element of an HTML document, and it can have only one attribute: href or target. The href attribute defines the base URL for all relative links on the page, and the target attribute defines the default target for all hyperlinks and forms on the page.

For example, if you have a base.html file that contains the following code:
```html
<html>
<head>
  <base href="https://example.com/" target="_blank">
</head>
<body>
  <a href="about.html">About Us</a>
  <a href="contact.html">Contact Us</a>
</body>
</html>
```
Then, when you open this file in a browser, the links will point to https://example.com/about.html and https://example.com/contact.html, respectively, and they will open in a new tab or window. This can be useful if you want to avoid writing absolute URLs for every link on your page, or if you want to change the base URL easily without modifying every link.

However, using a <base> element can also have some drawbacks, such as:

- It can cause confusion when debugging or testing your web pages locally, as the links will not work as expected unless you have a local server running with the same base URL.
- It can affect the behavior of some relative URLs that start with a slash (/), such as images, scripts, or stylesheets. For example, if you have an image tag like this:

```html    
<img src="/images/logo.png">
```    

Then, with a base URL of https://example.com/, the image will be loaded from https://example.com/images/logo.png. But without a base URL, the image will be loaded from the same folder as the HTML file.
- It can override the target attribute of individual links or forms on the page, unless you specify a different target for them. For example, if you have a link like this:

<a href="https://www.bing.com/" target="_self">Search with Bing</a>

Then, with a base target of _blank, the link will open in a new tab or window, even though you specified _self as the target. But without a base target, the link will open in the same tab or window.

Therefore, whether you use a `<base>` element or not depends on your web project's needs and preferences. Some web developers prefer to use absolute URLs for all their links and resources, while others prefer to use relative URLs and a `<base>` element. There is no right or wrong answer here, as long as you are consistent and aware of the consequences of your choice.

Source: Conversation with Bing, 8/3/2023
- (1) `<base>`: The Document Base URL element - MDN Web Docs. https://developer.mozilla.org/en-US/docs/Web/HTML/Element/base.
- (2) HTML `<base>` Tag Definition, Usage and Examples - Holistic SEO. https://www.holisticseo.digital/technical-seo/html/meta-tag/base.
- (3) HTML basics - Learn web development | MDN - MDN Web Docs. https://developer.mozilla.org/en-US/docs/Learn/Getting_started_with_the_web/HTML_basics.
- (4) Getting started with HTML - Learn web development | MDN. https://developer.mozilla.org/en-US/docs/Learn/HTML/Introduction_to_HTML/Getting_started.
- (5) HTML - Wikipedia. https://en.wikipedia.org/wiki/HTML.
- (6) HTML for Web Development | General Assembly. https://generalassemb.ly/blog/html-web-development-building-bones-website/.