In [97]:
!pip install webdriver_manager

In [98]:
import time

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from textblob import TextBlob

In [99]:
class Analyzer:
    def __init__(self):
        pass

    def analyze_reviews(self, reviews):
        for review in reviews:
            review.sentiment = self.get_sentiment(review.text)

    def get_sentiment(self, review):
        analysis = TextBlob(review)
        return analysis.sentiment.polarity


In [100]:
class Formatter:
    def __init__(self):
        pass

    def format_product(self, product):
        formatted_reviews = "\n\n".join(
            [f"Review {idx}: {review.text} - Sentiment: {review.sentiment} \n" for idx, review in
             enumerate(product.reviews, start=1)])
        return f"Product: {product.name}\nPrice: {product.price}\nRating: {product.rating}\nImage URL: {product.image_url}\nReviews:\n{formatted_reviews}\n"

    def display_results(self, products):
        for product in products:
            print(self.format_product(product))


In [101]:
class Product:
    def __init__(self, name, price, rating, image_url):
        self.name = name
        self.price = price
        self.rating = rating
        self.image_url = image_url
        self.reviews = []

    def add_review(self, review):
        self.reviews.append(review)


In [102]:
class ReviewSentiment:
    def __init__(self, text):
        self.text = text
        self.sentiment = None

In [103]:
class Review:
    def __init__(self):
        options = Options()
        options.headless = True
        self.driver = webdriver.Chrome(options=options)

    def fetch_reviews(self, product_link):
        self.driver.get(product_link)
        time.sleep(2)
        reviews = []
        try:
            self.driver.find_element(By.PARTIAL_LINK_TEXT, "See more reviews").click()
            time.sleep(2)
            html_content = self.driver.page_source
            reviews.extend(self.parse_review_page(html_content))
        except Exception as e:
            print(f"Error fetching reviews: {e}")
        finally:
            self.cleanup()
        return reviews

    def cleanup(self):
        self.driver.quit()

    def parse_review_page(self, html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        review_texts = [tag.get_text(strip=True) for tag in soup.find_all('span', {'data-hook': 'review-body'})]
        return [ReviewSentiment(text) for text in review_texts]


In [104]:
class Scrapper:
    def __init__(self):
        self.keyword = None
        self.product_count: int = 2
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            # 'User-Agent':
            #     'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
            'Accept-Language': 'en-US, en;q=0.5'
        }
        self.base_url = "https://www.amazon.ca"
        self.products = []

        # Initialize Selenium WebDriver
        options = Options()
        options.headless = True
        self.driver = webdriver.Chrome(options=options)

    def get_user_input(self):
        self.keyword = 'television'
        # self.keyword = input("Enter the product keyword: ")

    def search_products(self):
        search_url = f"{self.base_url}/s?k={self.keyword}"
        response = requests.get(search_url, headers=self.headers)

        if response.status_code == 200:
            # 'html.parser', 'lxml'
            soup = BeautifulSoup(response.content, 'lxml')
            product_links = [self.base_url + tag['href'] for tag in
                             soup.find_all('a', {'class': 'a-link-normal s-no-outline'}, href=True)]
            return product_links[: self.product_count]
        else:
            print("Failed to fetch the search results.")
            return []

    def extract_product_details(self, product_link):
        response = requests.get(product_link, headers=self.headers)
        if response.status_code == 200:
            # 'html.parser'
            soup = BeautifulSoup(response.content, 'lxml')
            try:
                name = soup.find(id="productTitle").get_text(strip=True)
                price_whole = soup.find("span", {"class": "a-price-whole"}).get_text(strip=True)
                price_fraction = soup.find("span", {"class": "a-price-fraction"}).get_text(strip=True)
                price = f"{price_whole}.{price_fraction}"
                rating = soup.find("span", {"class": "a-icon-alt"}).get_text(strip=True)
                image_url = soup.find("img", {"id": "landingImage"})['src']
                # reviews = soup.find("div", {"class": "reviews-content"})

                return Product(name, price, rating, image_url)
            except AttributeError:
                print("Error parsing product details.")
                return None
        else:
            print("Failed to fetch the product page.")
            return None

    def aggregate_data(self):
        product_links = self.search_products()

        for link in product_links:
            product = self.extract_product_details(link)
            if product:
                self.products.append(product)
                review_scraper = Review()
                reviews = review_scraper.fetch_reviews(link)
                print(reviews)

                for review in reviews:
                    product.add_review(review)

    def cleanup(self):
        self.driver.quit()

    def run(self):
        self.get_user_input()
        self.aggregate_data()
        sentiment_analyzer = Analyzer()
        for product in self.products:
            sentiment_analyzer.analyze_reviews(product.reviews)
        output_formatter = Formatter()
        output_formatter.display_results(self.products)
        self.cleanup()


if __name__ == "__main__":
    scraper = Scrapper()
    scraper.run()
