In [2]:
import pandas as pd
df=pd.read_csv("C:/Users/abdullah/Desktop/Data Visualization Project/Labeled_Dataset_with_Keywords.csv")

df.head()

Unnamed: 0,Source,Link,Headline,Description,Timestamp,Date,Topic,Author,Region,Article_Content,Sentiment_Bias,Keywords
0,Al Jazeera,https://www.aljazeera.com/tag/israel-palestine...,Israel-Palestine conflict | Today's latest fro...,How Israel destroyed Gaza · 'The birds are wit...,3 Oct 2024,2024-10-14,Israel War,AlJazeera,Middle East,‘Self-defence’ has vastly different meanings f...,Negative,"['genocide', 'killing', 'canadians', 'killed',..."
1,Al Jazeera,https://www.aljazeera.com/tag/gaza/,Gaza | Today's latest from Al Jazeera,... Israeli. Nicaragua breaks diplomatic ties ...,12 Oct 2024,2024-10-14,Israel War,AlJazeera,Middle East,‘Self-defence’ has vastly different meanings f...,Negative,"['killed', 'siege', 'bomb', 'attack', 'injured..."
2,Al Jazeera,https://www.aljazeera.com/news/2023/9/28/turki...,Turkish neutrality: How Erdogan manages ties w...,"Sep 28, 2023 ... But Erdogan's stance does hel...",Last update 28 Sep 2023,2024-10-14,Ukraine War,AlJazeera,Ukraine,"‘The West is reliable, Russia is equally relia...",Neutral,"['putin', '1850s', 'russian', 'pbs', 'russia',..."
3,Al Jazeera,https://www.aljazeera.com/features/2016/11/8/u...,US elections in Nigeria: 'The best reality TV ...,"Nov 8, 2016 ... Efeoghene Ori-Jesu, 34, is wat...",Last update 8 Nov 2016,2024-10-15,US Presidential Elections,AlJazeera,USA,“I’m excited at the possibility of a first fem...,Positive,"['president', 'trump', 'americans', 'clinton',..."
4,Al Jazeera,https://www.aljazeera.com/news/liveblog/2024/9...,Israel's war on Gaza updates: New blasts in Le...,"Sep 18, 2024 ... A day after simultaneous blas...",Last update 19 Sep 2024,2024-10-14,Israel War,AlJazeera,Middle East,A day after simultaneous blasts across Lebanon...,Negative,"['killed', 'explosions', 'wounded', 'blasts', ..."


In [4]:
columns=df.columns
print(columns)

Index(['Source', 'Link', 'Headline', 'Description', 'Timestamp', 'Date',
       'Topic', 'Author', 'Region', 'Article_Content', 'Sentiment_Bias',
       'Keywords'],
      dtype='object')


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB,GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

data=df
X = data['Article_Content']
y = data['Sentiment_Bias']

vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

models = {
    "Naive BayesMB": MultinomialNB(),
    #"Naive BayesG": GaussianNB(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier()
}

accuracy_scores = {}
for model_name, model in models.items():
   
   if model_name == "Naive Bayes (GaussianNB)":
        # Convert sparse matrix to dense for GaussianNB
        X_train_dense = X_train.toarray()
        X_test_dense = X_test.toarray()
        model.fit(X_train_dense, y_train)
        y_pred = model.predict(X_test_dense)
   else:
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
   accuracy = accuracy_score(y_test, y_pred)
   accuracy_scores[model_name] = accuracy

for model_name, accuracy in accuracy_scores.items():
    print(f"{model_name}: Accuracy = {accuracy:.2f}")


Naive BayesMB: Accuracy = 0.69
Logistic Regression: Accuracy = 0.73
Decision Tree: Accuracy = 0.64


In [5]:
import plotly.graph_objects as go

# Data for the models and their accuracies
models = ["Naive Bayes", "Logistic Regression", "Decision Tree"]
accuracies = [0.69, 0.73, 0.64]

# Create a bar graph
fig = go.Figure(
    data=[
        go.Bar(
            x=models,
            y=accuracies,
            text=[f"{acc * 100:.1f}%" for acc in accuracies],
            textposition='auto',
            marker_color=['#ffaaaa', '#aaffaa', '#aaaaff']  # Different colors for each bar
        )
    ]
)

# Customize layout
fig.update_layout(
    title="Model Accuracy Comparison",
    xaxis_title="Models",
    yaxis_title="Accuracy",
    yaxis=dict(tickformat=".0%"),
    template="plotly_white",
    width=700,
    height=500
)

# Show the graph
fig.show()


In [9]:
import plotly.express as px

fig = px.histogram(df, x='Sentiment_Bias', color='Source', barmode='group', nbins=20)
fig.update_layout(
    title='Histogram of Sentiment Bias by Source',
    xaxis_title='Sentiment Bias',
    yaxis_title='Frequency'
)
fig.show()

In [None]:
import plotly.express as px

sentiment_source_counts = df.groupby(['Source', 'Sentiment_Bias']).size().reset_index(name='Count')

fig = px.line(sentiment_source_counts, x='Source', y='Count', color='Sentiment_Bias', markers=True)

fig.update_layout(
    title='Sentiment Bias by Source',
    xaxis_title='Source',
    yaxis_title='Count',
    legend_title='Sentiment Bias'
)

fig.show()

In [15]:
fig = px.histogram(df, x='Sentiment_Bias', nbins=20)
fig.update_layout(
    title='Histogram of Sentiment Bias',
    xaxis_title='Sentiment Bias',
    yaxis_title='Frequency'
)
fig.show()

In [19]:
sources = sentiment_source_counts['Source'].unique()

for source in sources:
    source_data = sentiment_source_counts[sentiment_source_counts['Source'] == source]
    fig = px.pie(source_data, names='Sentiment_Bias', values='Count', title=f'Sentiment Distribution for {source}')
    fig.show()