# Integration With REST API, Web Portal And MongoDB

In [None]:
from flask import Flask, render_template, request,jsonify
from flask_cors import CORS,cross_origin
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import logging
logging.basicConfig(filename="scrapper.log" , level=logging.INFO)
from pymongo.mongo_client import MongoClient


app = Flask(__name__)

@app.route("/", methods = ['GET'])
def homepage():
    return render_template("index.html")

@app.route("/review" , methods = ['POST' , 'GET'])
def index():
    if request.method == 'POST':
        try:
            searchString = request.form['content'].replace(" ","")
            flipkart_url = "https://www.flipkart.com/search?q=" + searchString
            uClient = uReq(flipkart_url)
            flipkartPage = uClient.read()
            uClient.close()
            flipkart_html = bs(flipkartPage, "html.parser")
            bigboxes = flipkart_html.findAll("div", {"class": "_1AtVbE col-12-12"})
            del bigboxes[0:3]
            box = bigboxes[0]
            productLink = "https://www.flipkart.com" + box.div.div.div.a['href']
            prodRes = requests.get(productLink)
            prodRes.encoding='utf-8'
            prod_html = bs(prodRes.text, "html.parser")
            print(prod_html)
            commentboxes = prod_html.find_all('div', {'class': "_16PBlm"})

            filename = searchString + ".csv"
            fw = open(filename, "w")
            headers = "Product, Customer Name, Rating, Heading, Comment \n"
            fw.write(headers)
            reviews = []
            for commentbox in commentboxes:
                try:
                    #name.encode(encoding='utf-8')
                    name = commentbox.div.div.find_all('p', {'class': '_2sc7ZR _2V5EHH'})[0].text

                except:
                    logging.info("name")

                try:
                    #rating.encode(encoding='utf-8')
                    rating = commentbox.div.div.div.div.text


                except:
                    rating = 'No Rating'
                    logging.info("rating")

                try:
                    #commentHead.encode(encoding='utf-8')
                    commentHead = commentbox.div.div.div.p.text

                except:
                    commentHead = 'No Comment Heading'
                    logging.info(commentHead)
                try:
                    comtag = commentbox.div.div.find_all('div', {'class': ''})
                    #custComment.encode(encoding='utf-8')
                    custComment = comtag[0].div.text
                except Exception as e:
                    logging.info(e)

                mydict = {"Product": searchString, "Name": name, "Rating": rating, "CommentHead": commentHead,
                        "Comment": custComment}
                reviews.append(mydict)
            logging.info("log my final result {}".format(reviews))
            
            # Connecting with MongoDB server
            uri = "mongodb+srv://aliabbas8152:paikarali8152@aliabbas8152.h1podr5.mongodb.net/?retryWrites=true&w=majority&appName=aliabbas8152"

            # Create a new client and connect to the server
            client = MongoClient(uri)
            
            # Creating a database
            db = client['Review_Scrapper']
            
            # creating a collection
            coll_review = db['Review_Scrapper_project']
            
            # inserting data into the collection
            coll_review.insert_many(reviews)

            
            return render_template('result.html', reviews=reviews[0:(len(reviews)-1)])
        except Exception as e:
            logging.info(e)
            return 'something is wrong'
    # return render_template('results.html')

    else:
        return render_template('index.html')


if __name__=="__main__":
    app.run(host="0.0.0.0")


This code is a Python Flask web application that serves as a web scraper for extracting product reviews from Flipkart and storing them in a MongoDB database. Let's break down the code step by step:

1. **Import Statements**:
    - `Flask`, `render_template`, `request`, and `jsonify` are imported from the Flask library for building the web application.
    - `CORS` and `cross_origin` are imported from `flask_cors` to handle Cross-Origin Resource Sharing (CORS) headers.
    - `requests` is imported to make HTTP requests.
    - `BeautifulSoup` from `bs4` is used for web scraping.
    - `urlopen` is imported from `urllib.request` to open URLs.
    - `logging` is imported for logging errors and information.
    - `MongoClient` is imported from `pymongo.mongo_client` to interact with MongoDB.

2. **Initializing Flask App**:
    - An instance of the Flask class is created and stored in the variable `app`.

3. **Routes**:
    - Two routes are defined: 
        - `'/'` for the homepage, which renders an HTML template.
        - `'/review'` for handling the review extraction process. It accepts both GET and POST requests.

4. **Homepage Route** (`homepage()` function):
    - Renders the `index.html` template.

5. **Review Extraction Route** (`index()` function):
    - Handles both GET and POST requests.
    - If a POST request is received:
        - Extracts the search string from the form data.
        - Constructs the URL for Flipkart search based on the search string.
        - Retrieves the HTML content of the search results page using `requests`.
        - Parses the HTML content using BeautifulSoup.
        - Finds the product link from the search results and retrieves the product page.
        - Parses the product page to extract review-related information such as customer name, rating, comment heading, and comment.
        - Logs any errors encountered during the extraction process.
        - Stores the extracted reviews in a MongoDB database.
        - Renders the `result.html` template with the extracted reviews.
    - If a GET request is received, renders the `index.html` template.

6. **MongoDB Integration**:
    - The code connects to a MongoDB Atlas cluster using the provided URI.
    - It inserts the extracted reviews into a collection named `Review_Scrapper_project` within the `Review_Scrapper` database.

7. **Error Handling**:
    - The code logs any exceptions that occur during the review extraction process.
    - If an exception occurs, it returns the message "something is wrong".

8. **Running the Application**:
    - If the script is executed directly (`__name__ == "__main__"`), the Flask application is run on the server `0.0.0.0`.

Overall, this Flask application provides a web interface for users to input a product name, scrape reviews from Flipkart, and store them in a MongoDB database.



```python
from flask import Flask, render_template, request,jsonify
from flask_cors import CORS,cross_origin
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as uReq
import logging
logging.basicConfig(filename="scrapper.log" , level=logging.INFO)
from pymongo.mongo_client import MongoClient
```

- `from flask import Flask, render_template, request,jsonify`: This line imports necessary functions and classes from the Flask library. `Flask` is used to create an instance of the Flask application, `render_template` is used to render HTML templates, `request` is used to access incoming request data, and `jsonify` is used to serialize data to JSON format.
- `from flask_cors import CORS,cross_origin`: This line imports functions to handle Cross-Origin Resource Sharing (CORS). `CORS` is a Flask extension for handling CORS, and `cross_origin` is a decorator for enabling CORS on specific routes.
- `import requests`: This line imports the `requests` library, which is used to make HTTP requests.
- `from bs4 import BeautifulSoup as bs`: This line imports the `BeautifulSoup` class from the `bs4` library. `BeautifulSoup` is used for parsing HTML and XML documents.
- `from urllib.request import urlopen as uReq`: This line imports the `urlopen` function from the `urllib.request` module. `urlopen` is used to open URLs.
- `import logging`: This line imports the `logging` module, which is used for logging messages.
- `logging.basicConfig(filename="scrapper.log" , level=logging.INFO)`: This line configures basic logging settings. It specifies the filename for the log file and sets the logging level to INFO.
- `from pymongo.mongo_client import MongoClient`: This line imports the `MongoClient` class from the `pymongo.mongo_client` module. `MongoClient` is used to connect to MongoDB.
```python




```
```python
app = Flask(__name__)
```

- This line creates an instance of the Flask class and stores it in the variable `app`. The `__name__` variable is a special variable in Python that represents the name of the current module.




```python
@app.route("/", methods = ['GET'])
def homepage():
    return render_template("index.html")
```

- This line defines a route for the homepage ("/"). When a GET request is received at this route, the `homepage()` function is executed. This function renders the `index.html` template and returns it as the response.
```python




```
```python
@app.route("/review" , methods = ['POST' , 'GET'])
def index():
    if request.method == 'POST':
        try:
            # Code for scraping and storing reviews
        except Exception as e:
            logging.info(e)
            return 'something is wrong'
    else:
        return render_template('index.html')
```

- This block of code defines a route for handling review extraction ("/review"). This route accepts both POST and GET requests.
- If a POST request is received, the `index()` function is executed. This function contains the code for scraping Flipkart reviews and storing them in MongoDB. If an exception occurs during this process, the exception is logged, and the message "something is wrong" is returned.
- If a GET request is received, the function renders the `index.html` template and returns it as the response.
```python




```
```python
if __name__=="__main__":
    app.run(host="0.0.0.0")
```

- This line checks if the script is being run directly (not imported as a module). If so, it starts the Flask application by calling the `run()` method on the `app` object. The `host="0.0.0.0"` parameter specifies that the application should listen on all available network interfaces.