In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from textblob import TextBlob

# Base URL and headers
base_url = "https://dragonball.fandom.com/wiki/Dragon_Ball_Super_(anime)"
headers = {"User-Agent": "Mozilla/5.0"}

# Data storage
data = []

# Function to scrape a single page
def get_page(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    return soup

# Function to extract data from the page
def extract_data(soup):
    # Example: Extract paragraph text
    paragraphs = soup.find_all("p")
    
    # Extracting and cleaning data
    for para in paragraphs:
        text = para.get_text(strip=True)
        if text:  # Skip empty paragraphs
            # Perform sentiment analysis
            sentiment = TextBlob(text).sentiment.polarity
            data.append({"text": text, "sentiment": sentiment})

# Function to find the next page (if any)
def find_next_page(soup):
    # Since the Dragon Ball Super page might not have pagination, this is just a placeholder
    # If there are pagination buttons on the website, they would be handled here.
    next_link = soup.find("a", {"class": "next-page"})
    if next_link:
        return next_link["href"]
    return None

# Scrape and process data from multiple pages
page_url = base_url
while page_url:
    soup = get_page(page_url)
    extract_data(soup)
    next_page = find_next_page(soup)
    if next_page:
        page_url = next_page
    else:
        break

# Convert to DataFrame and save as CSV
df = pd.DataFrame(data)
df.to_csv("dragonball_super_sentiment.csv", index=False)

print("Data saved to dragonball_super_sentiment.csv")

Data saved to dragonball_super_sentiment.csv
