In [None]:
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

# Step 1: Scrape the website
def scrape_website(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Ensure the request was successful
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract all text content
        text = ' '.join([tag.get_text(strip=True) for tag in soup.find_all('p')])
        return text
    except requests.exceptions.RequestException as e:
        print(f"Error while fetching the webpage: {e}")
        return ""

# Step 3: Preprocessing
def preprocess_text(text):
    try:
        tokens = word_tokenize(text.lower())
        stop_words = set(stopwords.words('english'))
        filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
        return filtered_tokens
    except Exception as e:
        print(f"Error during preprocessing: {e}")
        return []

# Step 4: Bag of Words and Sentiment Scoring
def sentiment_analysis(tokens):
    positive_words = ["good", "great", "excellent", "positive", "happy", "awesome", "fantastic"]
    negative_words = ["bad", "poor", "terrible", "negative", "sad", "awful", "horrible"]
    positive_score = sum(word in positive_words for word in tokens)
    negative_score = sum(word in negative_words for word in tokens)
    return positive_score, negative_score

# Main Function
def main():
    url = input("Enter the URL to scrape: ")
    try:
        print("Scraping data, please wait...")
        text = scrape_website(url)
        if not text:
            print("No data scraped from the webpage. Please check the URL.")
            return

        print(f"Scraped text: {text[:500]}...")  # Display the first 500 characters for brevity

        tokens = preprocess_text(text)
        print(f"Preprocessed Tokens: {tokens[:20]}...")  # Display first 20 tokens

        positive_score, negative_score = sentiment_analysis(tokens)
        print(f"Positive Score: {positive_score}, Negative Score: {negative_score}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if __name__ == "__main__":
    main()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Scraping data, please wait...
Scraped text: Name:High Neck Double Fold Full Sleeve Top Black Fabric:Lycra Sleeve Length:Long Sleeves Pattern:Solid Net Quantity (N):1 Sizes: S, M, L, XL Country of Origin:India Helpful (0) Helpful (0)...
Preprocessed Tokens: ['name', 'high', 'neck', 'double', 'fold', 'full', 'sleeve', 'top', 'black', 'fabric', 'lycra', 'sleeve', 'length', 'long', 'sleeves', 'pattern', 'solid', 'net', 'quantity', 'n']...
Positive Score: 0, Negative Score: 0


: 