In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
import plotly.express as px
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import re

# Load combined data
data = pd.read_csv('/content/combined_cleaned_data.csv')

# Preview data
print(data.head())

                                           Paragraph  \
0                       To enjoy additional benefits   
1                                    CONNECT WITH US   
2  Updated  - November 02, 2024 10:41 pm IST - Mu...   
3  \n                    Shiv Sena (UBT) chief Ud...   
4  Shiv Sena (UBT) chief Uddhav Thackeray will be...   

                                        cleaned_text  
0                           enjoy additional benefit  
1                                          connect u  
2                     updated november pm ist mumbai  
3  shiv sena ubt chief uddhav thackeray file phot...  
4  shiv sena ubt chief uddhav thackeray begin cam...  


In [None]:
def get_sentiment(text):
    # Convert the input to a string to handle potential float values
    text = str(text)
    # Calculate sentiment polarity
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity
    return sentiment

# Apply sentiment analysis
data['sentiment_score'] = data['cleaned_text'].apply(get_sentiment)

In [None]:
# Define a list of political parties (you can add/remove as per need)
parties = ['bjp', 'shiv sena', 'shiv sena ubt', 'congress', 'ncp']

# Create columns for each party to hold sentiment scores
for party in parties:
    data[party] = data['cleaned_text'].astype(str).apply(lambda x: party in x)

# Group by each party and calculate average sentiment scores
party_sentiments = {}
for party in parties:
    # Filter articles mentioning the party and calculate mean sentiment score
    party_data = data[data[party]]
    party_sentiments[party] = party_data['sentiment_score'].mean()

# Convert party sentiments to DataFrame for easy plotting
party_sentiment_df = pd.DataFrame(list(party_sentiments.items()), columns=['Party', 'Average_Sentiment'])
print(party_sentiment_df)


           Party  Average_Sentiment
0            bjp           0.009492
1      shiv sena          -0.005274
2  shiv sena ubt          -0.004646
3       congress           0.026280
4            ncp           0.004637


In [None]:
fig = px.bar(
    party_sentiment_df,
    x='Party',
    y='Average_Sentiment',
    color='Average_Sentiment',
    color_continuous_scale='viridis',  # Use viridis color scale for consistency
    title='Average Sentiment Score by Political Party'
)

# Customize layout
fig.update_layout(
    xaxis_title="Political Party",
    yaxis_title="Average Sentiment Score",
    coloraxis_colorbar=dict(title="Sentiment Score", ticksuffix="")
)

# Show the plot
fig.show()

This bar plot provides a visual comparison of the average sentiment score for each political party, highlighting the public sentiment trend for each party based on the sentiment analysis.

In [None]:
# Define features and target variable
X = party_sentiment_df[['Average_Sentiment']]
y = [1 if sentiment > 0 else 0 for sentiment in party_sentiment_df['Average_Sentiment']]  # 1 = positive outcome, 0 = negative outcome (can adjust based on goals)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-squared:", r2_score(y_test, y_pred))


Mean Squared Error: 0.1345995267356947
R-squared: nan



R^2 score is not well-defined with less than two samples.



In [None]:
# Create the heatmap
fig = px.imshow(
    party_sentiment_df[['Average_Sentiment']].T,  # Transpose to set parties on x-axis
    labels=dict(x="Party", y="Sentiment Type", color="Sentiment Score"),
    x=party_sentiment_df['Party'],
    y=['Average Sentiment'],
    color_continuous_scale='RdYlGn',  # Red to green for negative to positive sentiment
    title="Sentiment Scores Affecting Political Parties"
)

# Customize layout
fig.update_layout(
    xaxis_title="Political Party",
    yaxis_title="Sentiment Type",
    coloraxis_colorbar=dict(title="Sentiment Score", ticksuffix=""),
)

# Show the plot
fig.show()


 This heatmap visually represents the average sentiment scores for each political party, providing insight into sentiment polarity across parties, from negative (red) to positive (green), based on the sentiment analysis.