In [1]:
import os
import pandas as pd
from textblob import TextBlob


In [2]:
# Directory for storing data
DATA_DIR = "../data"


In [3]:
# Domains and their respective CSV files
domain_files = {
    "sports": "sports_articles.csv",
    "technology": "technology_articles.csv",
    "health": "health_articles.csv",
    "entertainment": "entertainment_articles.csv",
    "business": "business_articles.csv",
}

In [4]:
# Function to preprocess text
def preprocess_text(text):
    # Add your preprocessing steps here
    # For example: lowercasing, removing punctuation, etc.
    text = text.lower()
    text = "".join(char for char in text if char.isalnum() or char.isspace())
    return text

In [11]:
# Function to analyze sentiment
def analyze_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    print(polarity)

    if polarity > 0:
        return 1, 0, 0  # Positive
    elif polarity < 0:
        return 0, 1, 0  # Negative
    else:
        return 0, 0, 1  # Neutral

In [15]:
text = """Thaman S: The creative force behind hits like Ramuloo Ramulaa and Nain MattakaThaman S, famed for his South Indian hits, is set to debut his first full Bollywood album with Baby John this Christmas. The film stars Varun Dhawan in the lead and the will be released on December 25.",https://www.indiatoday.in/movies/bollywood/story/meet-thaman-s-man-behind-tracks-like-ramuloo-ramulaa-nain-mattaka-baby-john-varun-dhawan-2651892-2024-12-18,"If you’re a fan of South Indian cinema and its infectious music, chances are you’ve found yourself dancing to Ramuloo Ramulaa from Ala Vaikunthapurramuloo. But do you know the musical genius behind this chartbuster? Enter Thaman S, the maestro whose compositions have become chartbusters. He is set to make waves in Bollywood with his debut full album for Varun Dhawan’s Christmas release, Baby John. Here’s everything you need to know about this prolific composer and his extraordinary journey. Born as Ghantasala Sai Srinivas in Nellore, Andhra Pradesh, Thaman comes from a lineage steeped in music. His grandfather, Ghantasala Balaramayya, was a celebrated director and producer, while his father, Ghantasala Siva Kumar, was a renowned drummer who worked in over 700 films with legendary composer K Chakravarthy. His mother, Ghantasala Savitri, sister Yamini, and aunt B. Vasantha have all lent their voices as playback singers. Growing up in a Telugu-speaking family of musicians in Chennai, Thaman was immersed in the rhythms and melodies that would later define his career.   Thaman made his debut as a music director with Kick (2009) and has since become one of the most sought-after composers in South Indian cinema. His discography reads like a greatest-hits playlist -- Race Gurram, Sarrainodu, Tuck Jagadish, Sarkaru Vaari Paata, Varisu, and Guntur Kaaram. Each project showcased his knack for blending traditional Indian sounds with modern beats, creating tracks that resonate with listeners across generations and regions. It’s not just his music, but also his collaborations with top stars like Allu Arjun, Mahesh Babu, and Vijay that have cemented his position as a musical powerhouse. Tracks like Butta Bomma and Jai Balayya are proof of his ability to craft earworms that linger long after the credits roll.   While Thaman has dabbled in Bollywood before, composing the main theme for Simmba, Jai Bajrangbali for Singham Again, and the title track for Golmaal Again, Baby John marks his full-fledged album debut. Fans are eagerly awaiting how his signature style translates to Bollywood’s diverse musical landscape. Thaman’s entry into Bollywood is a natural progression for a composer who has already achieved superstar status in the South. With Baby John releasing this Christmas, he’s set to introduce a whole new audience to his distinctive sound.   Thaman’s appeal lies in his ability to innovate while staying true to his roots. Whether it’s the pulsating beats of Ramuloo Ramulaa or the soulful melodies of Butta Bomma, his compositions have a universal quality that transcends language and culture.   For those unfamiliar with Thaman’s work, now is the perfect time to explore his discography. Start with Ala Vaikunthapurramuloo and Race Gurram, then dive into his latest releases like Varisu and Guntur Kaaram. With Baby John just around the corner, Thaman S is poised to become a household name not just in India but globally. Whether you’re a die-hard fan or a curious newcomer, Thaman’s music promises a journey full of rhythm, emotion, and unforgettable melodies.Published By: Trisha BhattacharyaPublished On: Dec 18, 2024ALSO READ |  Varun Dhawan's Baby John: What to expect from the remake of Atlee's Theri
"""

In [13]:
analyze_sentiment(text)

0.24957386363636364


(1, 0, 0)

In [18]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()
text = "hey i am sad but i m glad ur here"

# Get sentiment scores (positive, negative, neutral, and compound)
scores = analyzer.polarity_scores(text)

# Extract positive and negative percentages
positive_percentage = scores['pos'] * 100
negative_percentage = scores['neg'] * 100

print(f"Positive: {positive_percentage}%")
print(f"Negative: {negative_percentage}%")


Positive: 28.499999999999996%
Negative: 14.6%


In [23]:

# Function to analyze sentiment using VADER
def analyze_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    # Get sentiment scores (positive, negative, neutral, and compound)
    scores = analyzer.polarity_scores(text)
    
    # Extract positive and negative percentages and round them to 2 decimals
    positive_percentage = round(scores['pos'] * 100, 2)
    negative_percentage = round(scores['neg'] * 100, 2)
    
    # Return positive and negative percentages
    return positive_percentage, negative_percentage

# Function to process each domain's articles
def process_domain(domain, file_name):
    file_path = os.path.join(DATA_DIR, file_name)

    if not os.path.exists(file_path):
        print(f"File for domain '{domain}' not found: {file_name}")
        return

    # Read the CSV file
    df = pd.read_csv(file_path)

    # Add sentiment columns
    df["Positive"] = 0
    df["Negative"] = 0

    # Process each row
    for index, row in df.iterrows():
        headline = preprocess_text(row["Headline"])
        content = preprocess_text(row["Content"])

        # Combine headline and content for sentiment analysis
        combined_text = f"{headline} {content}"
        positive_percentage, negative_percentage = analyze_sentiment(combined_text)

        # Store the results in the dataframe
        df.at[index, "Positive"] = positive_percentage
        df.at[index, "Negative"] = negative_percentage

    # Save the updated CSV
    df.to_csv(file_path, index=False)
    print(f"Sentiment analysis completed for domain: {domain}")

In [24]:
# Main script execution
def main():
    for domain, file_name in domain_files.items():
        print(f"Processing domain: {domain}")
        process_domain(domain, file_name)
        break

In [25]:
main()

Processing domain: sports


  df.at[index, "Positive"] = positive_percentage
  df.at[index, "Negative"] = negative_percentage


Sentiment analysis completed for domain: sports
