In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# *** DATA COLLECTION ***
def extract_recipe_data(recipe_url):
    response = requests.get(recipe_url, timeout=5)
    response.raise_for_status()

    soup = BeautifulSoup(response.content, 'html.parser')

    title = soup.find('h1').text
    ingredients = [ing.text for ing in soup.find_all('li', class_='ingredients-item-name')]
    # Assuming reviews exist in elements with a specific class
    reviews = [review.text for review in soup.find_all('p', class_='review-text')]

    return {
        'title': title,
        'ingredients': ingredients,
        'reviews': reviews
    }

# Simplified data collection
recipes_data = []
recipe_urls = ["https://example.com/recipe1", "https://example.com/recipe2"]  # Insert your recipe URLs
for url in recipe_urls:
    recipes_data.append(extract_recipe_data(url))
    time.sleep(2)  # Be polite when scraping

# Create a Pandas DataFrame
df = pd.DataFrame(recipes_data)

# *** TEXT PREPROCESSING ***
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    # ... Other preprocessing steps (remove punctuation, numbers, etc.) ...
    words = [w for w in text.split() if w not in stop_words]
    return ' '.join(words)

df['clean_reviews'] = df['reviews'].apply(preprocess_text)

# *** SENTIMENT ANALYSIS *** (Rule-Based)
positive_words = ['delicious', 'tasty', 'amazing', 'flavorful']
negative_words = ['bland', 'disappointing', 'terrible']

def rule_based_sentiment(text):
    pos_count = sum([text.count(w) for w in positive_words])
    neg_count = sum([text.count(w) for w in negative_words])

    if pos_count > neg_count:
        return 'positive'
    elif neg_count > pos_count:
        return 'negative'
    else:
        return 'neutral'

df['sentiment'] = df['clean_reviews'].apply(rule_based_sentiment)

# *** VISUALIZATION ***
plt.figure(figsize=(10,6))
sns.countplot(x='sentiment', data=df, palette='coolwarm')
plt.title('Distribution of Recipe Review Sentiment')
plt.xticks(rotation=45)
plt.show()

# (Optional) User Input - would require a simple web interface for usability


HTTPError: 404 Client Error: Not Found for url: https://example.com/recipe1