In [None]:
# !pip install vaderSentiment

# Import Libraries

In [None]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

from datetime import datetime

## Import SentimentIntensityAnalyzer() from vaderSentiment.vaderSentiment 

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def sentiment_scores(sentence):
    # Create a SentimentIntensityAnalyzer object.
    sid = SentimentIntensityAnalyzer()
    
    # polarity_scores method of SentimentIntensityAnalyzer
    sentiment_dict = sid.polarity_scores(sentence)
    return sentiment_dict['compound']

In [None]:
df = pd.read_csv('csv/txt_processed.csv')

In [None]:
df_cleaned = df.copy().sample(frac=0.25)

In [None]:
df_cleaned.head()

## Clear NaN in Text Column and Replace them with string space ("")
To avoid error from coverting text data to datetime value

In [None]:
df_cleaned.text.fillna("",inplace=True)

## Perform Sentimental Analysis to get sentimental score set

In [None]:
df_cleaned['txtmix'] = df_cleaned['title']+df_cleaned['text']

In [None]:
df_cleaned['mix'] = df_cleaned.txtmix.apply(sentiment_scores)

In [None]:
df_cleaned['title_sen'] = df_cleaned.title.apply(sentiment_scores)
df_cleaned['txt_sen'] = df_cleaned.text.apply(sentiment_scores)

In [None]:
df_cleaned['date'] = pd.to_datetime(df_cleaned['date'])

In [None]:
df_cleaned['month'] = df_cleaned['date'].dt.month

## Plot Bar Chart to see the score

Overall sentimental score on Fake News

In [None]:
source1 = df_cleaned[df_cleaned['label']==0]
source2 = df_cleaned[df_cleaned['label']==1]

# title_sen = alt.Chart(source).mark_bar().encode(
#     alt.X('mix:Q', bin=True),
#     y='count()',
# ).properties(
#     title='Sentimental Score on Fake News'
# )

month_fake = alt.Chart(source1).mark_bar().encode(
    x='month',
    y='mix'
).properties(
    title='Sentimental Score on Fake News by month'
)

month_true = alt.Chart(source2).mark_bar().encode(
    x='month',
    y='mix'
).properties(
    title='Sentimental Score on True News by month'
)


month_fake | month_true

## Plot Boxplot on Title and Text Columns
Sentimental Score of Fake News' Text

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_cleaned[df_cleaned['label']==0]
sns.set(font_scale=2)

# Plot the orbital period with horizontal boxes
sns.boxplot(x="txt_sen", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag").set(title='Sentimental Score of Fake News Articles')

# Add in points to show each observation
sns.stripplot(x="txt_sen", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

Sentimental Score of Fake News' Title

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_cleaned[df_cleaned['label']==0]
sns.set(font_scale=2)

# Plot the orbital period with horizontal boxes
sns.boxplot(x="title_sen", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag").set(title='Sentimental Score of Fake News Titles')

# Add in points to show each observation
sns.stripplot(x="title_sen", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

Sentimental Score of True News' Text

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_cleaned[df_cleaned['label']==1]
sns.set(font_scale=2)

# Plot the orbital period with horizontal boxes
sns.boxplot(x="txt_sen", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag").set(title='Sentimental Score of True News Articles')

# Add in points to show each observation
sns.stripplot(x="txt_sen", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

Sentimental Score of True News' Title

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_cleaned[df_cleaned['label']==1]
sns.set(font_scale=2)

# Plot the orbital period with horizontal boxes
sns.boxplot(x="mix", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag").set(title='Sentimental Score of True News Titles')

# Add in points to show each observation
sns.stripplot(x="mix", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

## Groupby label and month to compare the mean of sentimental score on Title and Text of Fake and Real News for each month

In [None]:
df_cleaned.head()

In [None]:
sns.set(font_scale=1)

In [None]:
df_cleaned[df_cleaned['label']==0].groupby(['month'])["title_sen","txt_sen"].mean().plot(figsize=(10,6))

In [None]:
df_cleaned[df_cleaned['label']==1].groupby(['month'])["title_sen","txt_sen"].mean().plot(figsize=(10,6))

In [None]:
df_cleaned.head()

In [None]:
sns.set_theme(style="white", context="talk")
sns.set(font_scale=1)

fig, ax = plt.subplots(figsize=(10, 5))

sns.barplot(x='month', y='title_sen', data=df_cleaned[df_cleaned['label']==0], color='#7c94b6')
sns.barplot(x='month', y='txt_sen', data=df_cleaned[df_cleaned['label']==0], color='#ff9f9b')

# custom legend
title_patch = mpatches.Patch(color='#7c94b6', label='Title')
text_patch = mpatches.Patch(color='#ff9f9b', label='Text')

plt.legend(handles=[title_patch, text_patch])


# labels for x & y axis
plt.xlabel('Months')
plt.ylabel('Sentimental Compound Score')

 
# title of plot
plt.title('Sentimental Compound Score of Title & Text on Fake News')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

sns.barplot(x='month', y='title_sen', data=df_cleaned[df_cleaned['label']==1], color='#7c94b6')
sns.barplot(x='month', y='txt_sen', data=df_cleaned[df_cleaned['label']==1], color='#ff9f9b')

# custom legend
title_patch = mpatches.Patch(color='#7c94b6', label='Title')
text_patch = mpatches.Patch(color='#ff9f9b', label='Text')
plt.legend(handles=[title_patch, text_patch])

# labels for x & y axis
plt.xlabel('Months')
plt.ylabel('Sentimental Compound Score')
# plt.gcf().autofmt_xdate(rotation=0)

 
# title of plot
plt.title('Sentimental Compound Score of Title & Text on True News')
plt.show()

In [None]:
# f, (ax1, ax2) = plt.subplots(2, 1, figsize=(7, 5), sharex=True)

# sns.barplot(x='month', y='title_sen', data=df_cleaned, color='red', alpha=.8, ax=ax1)
# ax1.axhline(0, clip_on=False)
# ax1.set_ylabel("Title")

# sns.barplot(x='month', y='txt_sen', data=df_cleaned, color='blue', alpha=.8, ax=ax2)
# ax2.axhline(0, clip_on=False)
# ax2.set_ylabel("Text")

# # Finalize the plot
# sns.despine(bottom=True)
# plt.setp(f.axes, yticks=[])
# plt.tight_layout(h_pad=2)


# # title of plot
# plt.title('Sentimental Compound Score of Title & Text')
# plt.show()

In [None]:
df_cleaned.groupby('label')[["title_sen","txt_sen",'mix']].agg(['mean','std','max','min'])

## Sentimental Analysis on Web scrapping data

In [None]:
df_fake = pd.read_csv('csv/fake_news.csv', index_col=[0])

In [None]:
df_fake['title_sentiment'] = df_fake.title.apply(sentiment_scores)
df_fake['text_sentiment'] = df_fake.text.apply(sentiment_scores)

In [None]:
df_true = pd.read_csv('csv/true_news.csv', encoding='unicode_escape', index_col=[0])

In [None]:
df_true.text.fillna("",inplace=True)

In [None]:
df_true['title_sentiment'] = df_true.title.apply(sentiment_scores)
df_true['text_sentiment'] = df_true.text.apply(sentiment_scores)

In [None]:
df_true.head()

## Boxplot of Fake News

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_fake

# Plot the orbital period with horizontal boxes
sns.boxplot(x="title_sentiment", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag")

# Add in points to show each observation
sns.stripplot(x="title_sentiment", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
ax.set_title("Sentimenal Analysis on Fake News Titles", fontsize=20)
sns.despine(trim=True, left=True)

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_fake

# Plot the orbital period with horizontal boxes
sns.boxplot(x="text_sentiment", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag")

# Add in points to show each observation
sns.stripplot(x="text_sentiment", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
ax.set_title("Sentimenal Analysis on Fake News Text", fontsize=20)
sns.despine(trim=True, left=True)

## Boxplot of True News

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_true

# Plot the orbital period with horizontal boxes
sns.boxplot(x="title_sentiment", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag")

# Add in points to show each observation
sns.stripplot(x="title_sentiment", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
ax.set_title("Sentimenal Analysis on True News Titles", fontsize=20)
sns.despine(trim=True, left=True)

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(17, 6))

# Load the example planets dataset
data=df_true

# Plot the orbital period with horizontal boxes
sns.boxplot(x="text_sentiment", y="subject", data=data,
            whis=[0, 100], width=.6, palette="vlag")

# Add in points to show each observation
sns.stripplot(x="text_sentiment", y="subject", data=data,
              size=4, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
ax.set_title("Sentimenal Analysis on True News Text", fontsize=20)
sns.despine(trim=True, left=True)

In [None]:
data_n = pd.concat([df_true, df_fake], ignore_index=True)

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(10, 6))

# Load the example planets dataset
data=data_n

# Plot the orbital period with horizontal boxes
sns.boxplot(x="text_sentiment", y="subject", data=data,
            whis=[0, 100], width=.6, hue='label')


# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
ax.set_title("Sentimenal Analysis on Text", fontsize=20)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')

In [None]:
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(10, 6))

# Load the example planets dataset
data=data_n

# Plot the orbital period with horizontal boxes
sns.boxplot(x="title_sentiment", y="subject", data=data,
            whis=[0, 100], width=.6, hue='label')


# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
ax.set_title("Sentimenal Analysis on Title", fontsize=20)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')