In [None]:
import sys
import datetime
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np
import configparser

# Connect to PostgreSQL database
config = configparser.ConfigParser()
config.read('config.ini')

# Get database configuration
database_config = {
    'database': config['Database']['database'],
    'user': config['Database']['user'],
    'password': config['Database']['password'],
    'host': config['Database']['host'],
}

# Connect to PostgreSQL database
conn = psycopg2.connect(**database_config)

# Define SQL queries
subreddit_count_query = """
    SELECT subreddit, COUNT(*) as count
    FROM comments_for_reddits
    GROUP BY subreddit
    ORDER BY count DESC
    LIMIT 10;
"""

# Fetch data into DataFrame
df_additional = pd.read_sql(subreddit_count_query, conn)

# Plotting the additional bar chart
plt.figure(figsize=(12, 8))
plt.bar(df_additional['subreddit'], df_additional['count'], color='blue')
plt.title('Top 10 Subreddits by Comment Count')
plt.xlabel('Subreddit')
plt.ylabel('Comment Count')
plt.xticks(rotation=45, ha='right')
plt.show()


x_axis = []
y_axis = []
count = 0
start = None
    

query = """
    SELECT created_utc FROM politics
    WHERE created_utc >= '2023-11-01' AND created_utc < '2023-11-14'
    ORDER BY created_utc;
    """
df = pd.read_sql(query, conn)
    
for index, row in df.iterrows():
    timestamp = row['created_utc']
    if start is None:
        start = timestamp
    count += 1
    endtime = timestamp
    if int(start.date().strftime("%d")) != int(endtime.date().strftime("%d")):
        x_axis.append(start)
        y_axis.append(count)
        start = None
        count = 0

if start not in x_axis:
    x_axis.extend([start, start + datetime.timedelta(days=1)])
    y_axis.extend([count, count/10 * 5.5])
    
plt.rcParams["figure.figsize"] = [30, 30]
fig, ax = plt.subplots()
ax.plot(x_axis, y_axis, "-o")
ax.xaxis.set_major_locator(plt.MaxNLocator(15)) 
ax.xaxis.set_major_formatter(DateFormatter("%Y-%m-%d"))  
    
plt.title("Comments Received From Reddit API For Politics", fontsize=30)
plt.xlabel("Date", fontsize=30)
plt.xticks(fontsize=20, fontweight='bold', rotation=45)  
plt.yticks(fontsize=25)
plt.ylabel("Num of Comments Received", fontsize=30)
plt.savefig("custom_graph")
plt.show()

reddit_query = "SELECT COUNT(*) as count FROM comments_for_reddits;"
chan_query = "SELECT COUNT(*) as count FROM thread;"
youtube_query = "SELECT COUNT(*) as count FROM yt_comments;"

# Fetch data into DataFrames
reddit_count = pd.read_sql(reddit_query, conn)['count'][0]
chan_count = pd.read_sql(chan_query, conn)['count'][0]
youtube_count = pd.read_sql(youtube_query, conn)['count'][0]

# Plotting the bar chart
labels = ['Reddit', '4chan', 'YouTube']
counts = [reddit_count, chan_count, youtube_count]

plt.figure(figsize=(10, 6))
plt.bar(labels, counts, color=['blue', 'green', 'red'])
plt.title('Data Count from Different APIs')
plt.xlabel('APIs')
plt.ylabel('Count')
plt.show()





In [None]:



sentiment_query_reddit = "SELECT sentiment, COUNT(*) as count FROM an_re_all GROUP BY sentiment;"
hate_speech_query_reddit = "SELECT is_hate_speech, COUNT(*) as count FROM an_re_all GROUP BY is_hate_speech;"

sentiment_query_4chan = "SELECT sentiment, COUNT(*) as count FROM an_4chan GROUP BY sentiment;"
hate_speech_query_4chan = "SELECT is_hate_speech, COUNT(*) as count FROM an_4chan GROUP BY is_hate_speech;"

sentiment_query_yt = "SELECT sentiment, COUNT(*) as count FROM an_yt GROUP BY sentiment;"
hate_speech_query_yt = "SELECT is_hate_speech, COUNT(*) as count FROM an_yt GROUP BY is_hate_speech;"

sentiment_query_politics = "SELECT sentiment, COUNT(*) as count FROM an_politics GROUP BY sentiment;"
hate_speech_query_politics = "SELECT is_hate_speech, COUNT(*) as count FROM an_politics GROUP BY is_hate_speech;"

politics_analysis_query = "SELECT s.hate_speech_confidence, s.sentiment_score, p.created_utc FROM an_r_poli_score s join politics p on s.comment_id=p.comment_id WHERE created_utc >= '2023-11-01' AND created_utc < '2023-11-14' GROUP BY p.created_utc,s.hate_speech_confidence,s.sentiment_score;"
# Fetch data into DataFrames
sentiment_reddit_df = pd.read_sql(sentiment_query_reddit, conn)
hate_speech_reddit_df = pd.read_sql(hate_speech_query_reddit, conn)

sentiment_4chan_df = pd.read_sql(sentiment_query_4chan, conn)
hate_speech_4chan_df = pd.read_sql(hate_speech_query_4chan, conn)

sentiment_yt_df = pd.read_sql(sentiment_query_yt, conn)
hate_speech_yt_df = pd.read_sql(hate_speech_query_yt, conn)

sentiment_politics_df = pd.read_sql(sentiment_query_politics, conn)
hate_speech_politics_df = pd.read_sql(hate_speech_query_politics, conn)

politics_scores_df = pd.read_sql(politics_analysis_query, conn)


plt.figure(figsize=(10, 6))
sentiment_reddit_df.plot(kind='bar', x='sentiment', y='count', color='green', fontsize=22, width=0.3)
plt.title('Sentiment Analysis for Reddit', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Sentiment', fontsize=25)
plt.show()

# Plotting for Hate Speech Analysis for Reddit
plt.figure(figsize=(10, 6))
hate_speech_reddit_df.plot(kind='bar', x='is_hate_speech', y='count', color='red', fontsize=22, width=0.3)
plt.title('Hate Speech Analysis for Reddit', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Hate Speech', fontsize=25)
plt.show()

# Plotting for Sentiment Analysis for Politics
plt.figure(figsize=(10, 6))
sentiment_politics_df.plot(kind='bar', x='sentiment', y='count', color='green', fontsize=22, width=0.3)
plt.title('Sentiment Analysis for Politics', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Sentiment', fontsize=25)
plt.show()

# Plotting for Hate Speech Analysis for Politics
plt.figure(figsize=(10, 6))
hate_speech_politics_df.plot(kind='bar', x='is_hate_speech', y='count', color='red', fontsize=22, width=0.3)
plt.title('Hate Speech Analysis for Politics', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Hate Speech', fontsize=25)
plt.show()

# Plotting for Sentiment Analysis for 4chan
plt.figure(figsize=(10, 6))
sentiment_4chan_df.plot(kind='bar', x='sentiment', y='count', color='green', fontsize=22, width=0.3)
plt.title('Sentiment Analysis for 4chan', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Sentiment', fontsize=25)
plt.show()

# Plotting for Hate Speech Analysis for 4chan
plt.figure(figsize=(10, 6))
hate_speech_4chan_df.plot(kind='bar', x='is_hate_speech', y='count', color='red', fontsize=22, width=0.3)
plt.title('Hate Speech Analysis for 4chan', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Hate Speech', fontsize=25)
plt.show()

#Sentiment Analysis for Youtube
plt.figure(figsize=(10, 6))
sentiment_yt_df.plot(kind='bar', x='sentiment', y='count', color='green', fontsize=22, width=0.3)
plt.title('Sentiment Analysis for Youtube', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Sentiment', fontsize=25)
plt.show()

# Plotting for Hate Speech Analysis for Youtube
plt.figure(figsize=(10, 6))
hate_speech_yt_df.plot(kind='bar', x='is_hate_speech', y='count', color='red', fontsize=22, width=0.3)
plt.title('Hate Speech Analysis for Youtube', fontsize=30)
plt.ylabel('Count', fontsize=25)
plt.xlabel('Hate Speech', fontsize=25)
plt.show()


# Fetch data into DataFrame
hate_speech_scores_df = pd.read_sql(politics_analysis_query, conn)

# Convert 'created_utc' to datetime
politics_scores_df['created_utc'] = pd.to_datetime(politics_scores_df['created_utc'])

# Create a wave-like graph using a sine function
plt.figure(figsize=(12, 8))

# Create a time array for the x-axis
time_array = np.linspace(0, 2 * np.pi, len(politics_scores_df['created_utc']))

# separate scatter plots for Hate Speech Confidence and Sentiment Score
# plt.figure(figsize=(12, 8))

# # Plot Hate Speech Confidence
# plt.scatter(time_array, politics_scores_df['hate_speech_confidence'], label='Hate Speech Confidence', color='red')
# plt.title('Hate Speech Confidence Over Time')
# plt.xlabel('Time')
# plt.ylabel('Hate Speech Confidence')
# plt.legend()
# plt.savefig("hate_speech_confidence_plot.png")
# plt.show()

# # Plot Sentiment Score
# plt.figure(figsize=(12, 8))
# plt.scatter(time_array, politics_scores_df['sentiment_score'], label='Sentiment Score', color='green')
# plt.title('Sentiment Score Over Time')
# plt.xlabel('Time')
# plt.ylabel('Sentiment Score')
# plt.legend()
# plt.savefig("sentiment_score_plot.png")
# plt.show()