In [2]:
!pip install textblob vaderSentiment



In [4]:
!mkdir input_csvs output_csvs

## Important

Add your csv to classify to input_csvs directory

In [6]:
import re
from bs4 import BeautifulSoup

def extract_text_from_html(html_content):
    try:
        soup = BeautifulSoup(html_content, "html.parser")
        text = soup.get_text(separator=" ", strip=True)

        text = re.sub(r'\s+', ' ', text)

        return text
    except Exception as e:
        print(f"Error extracting text: {e}")
        return None


In [8]:
import os
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def analyze_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    vs = analyzer.polarity_scores(extract_text_from_html(str(text)))

    return vs['compound']

def process_csv_files(input_dir, output_dir, text_column='text'):
    """
    Processes all CSV files in the input directory, adds a 'sentiment' column,
    and saves the modified CSV files to the output directory.

    Args:
        input_dir (str): Path to the directory containing CSV files.
        output_dir (str): Path to the directory to save the modified CSV files.
        text_column (str): Name of the column containing the text to analyze.
    """

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(input_dir):
        if filename.endswith('.csv'):
            input_filepath = os.path.join(input_dir, filename)
            output_filepath = os.path.join(output_dir, filename)

            try:
                df = pd.read_csv(input_filepath)

                if text_column not in df.columns:
                    print(f"Warning: Column '{text_column}' not found in {filename}. Skipping.")
                    continue

                df['sentiment'] = df[text_column].apply(analyze_sentiment)
                df.to_csv(output_filepath, index=False)
                print(f"Processed and saved: {filename}")

            except Exception as e:
                print(f"Error processing {filename}: {e}")

if __name__ == "__main__":
    input_directory = "input_csvs"
    output_directory = "output_csvs"
    text_column_name = "content"

    process_csv_files(input_directory, output_directory, text_column_name)


If you meant to use Beautiful Soup to parse the web page found at a certain URL, then something has gone wrong. You should use an Python package like 'requests' to fetch the content behind the URL. Once you have the content as a string, you can feed that string into Beautiful Soup.



    
  soup = BeautifulSoup(html_content, "html.parser")


Processed and saved: oracle database_posts_with_comments_answers.csv
