Import necessary libraries


In [1]:
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import spacy
from langdetect import detect

scrape data

In [2]:
def scrape_website(url):
    """
    Scrape all textual data from the given URL.
    """
    try:
        # Fetch the webpage content
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract all text content from the page
        page_text = soup.get_text(separator="\n").strip()

        return {"content": page_text}  # Return the data as a dictionary

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return {"error": str(e)}

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return {"error": str(e)}

Language detection


In [3]:
def detect_language(text):
    blob = TextBlob(text)
    return blob.detect_language()

Preprocessing


In [4]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    return filtered_tokens

Sentiment Analysis

In [5]:
def sentiment_analysis(tokens):
    positive_words = ["good", "great", "excellent", "positive", "happy", "awesome", "fantastic"]
    negative_words = ["bad", "poor", "terrible", "negative", "sad", "awful", "horrible"]
    positive_score = sum(word in positive_words for word in tokens)
    negative_score = sum(word in negative_words for word in tokens)
    return positive_score, negative_score

Relationship Extraction

In [6]:
def extract_relationships(text):
    doc = nlp(text)
    relationships = []
    for token in doc:
        if token.dep_ in ("nsubj", "dobj"):
            relationships.append((token.text, token.head.text, token.dep_))
    return relationships

In [7]:
def main():
    """
    Main function to scrape data from a URL and display the results.
    """
    try:
        # Prompt user for URL input
        url = input("Enter the product page URL: ").strip()

        print("\nScraping data, please wait...")
        product_data = scrape_website(url)

        # Check if data was successfully scraped
        if not product_data:
            print("No product data found. Please check the URL or ensure the website structure matches the code.")
        else:
            print("\nScraped Product Data:")
            print(f"Name: {product_data['name']}")
            print(f"Description: {product_data['description']}")
            print("Reviews:")
            for idx, review in enumerate(product_data['reviews'], start=1):
                print(f"  {idx}. {review}")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()



Scraping data, please wait...

Scraped Product Data:
An error occurred: 'name'


: 