In [1]:
import pandas as pd
from nltk.corpus import opinion_lexicon
from nltk.tokenize import word_tokenize
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize the sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Load subjectivity lexicon
subjectivity_lexicon = set(opinion_lexicon.words())

# Define function to calculate subjectivity and sentiment scores
def calculate_subjectivity_and_sentiment(text):
    # Initialize scores
    subjectivity_score = 0
    sentiment_score = 0
    
    # Tokenize the text
    words = word_tokenize(text)
    
    # Calculate subjectivity score
    for word in words:
        if word in subjectivity_lexicon:
            subjectivity_score += 1
    
    # Calculate sentiment score using VADER
    sentiment_scores = analyzer.polarity_scores(text)
    sentiment_score = sentiment_scores['compound']
    
    # Return the calculated scores
    return subjectivity_score, sentiment_score

# Define function to detect bias
def detect_bias(data):
    subjectivity_threshold_high = 7.0
    subjectivity_threshold_moderate = 4.0
    sentiment_threshold_extreme = 0.7
    sentiment_threshold_moderate = 0.3
    
    # Define function to detect bias per row
    def detect_bias_per_row(row):
        # Get subjectivity and sentiment scores
        subjectivity_score, sentiment_score = calculate_subjectivity_and_sentiment(row['processed_text'])
        
        # Check subjectivity and sentiment levels
        if subjectivity_score >= subjectivity_threshold_high and abs(sentiment_score) >= sentiment_threshold_extreme:
            return 'High bias'
        elif subjectivity_score >= subjectivity_threshold_moderate and abs(sentiment_score) >= sentiment_threshold_moderate:
            return 'Moderate bias'
        elif subjectivity_score < subjectivity_threshold_moderate and abs(sentiment_score) < sentiment_threshold_moderate:
            return 'Low bias'
        else:
            return 'Unknown bias'
    
    # Apply bias detection to each row
    data['bias_label'] = data.apply(detect_bias_per_row, axis=1)
    
    return data

# Define main function to process the input CSV file
def main(input_file_path, output_file_path):
    # Load the data
    data = pd.read_csv(input_file_path)
    
    # Apply bias detection
    data = detect_bias(data)
    
    # Save the results to a new CSV file
    data.to_csv(output_file_path, index=False)
    print(f'Results saved to: {output_file_path}')

# Specify the input and output file paths
input_csv_file_path = '/Downloads/test_with_sentiment.csv'  # Specify the input CSV file path
output_csv_file_path = '/Downloads/result.csv'  # Specify the output CSV file path

# Run the main function
main(input_csv_file_path, output_csv_file_path)


Results saved to: /Downloads/result.csv
