In [None]:
!pip install psycopg2
!pip install plotly
!pip install kaleido
!pip install calmap

In [None]:
from urllib.parse import urlparse
import psycopg2
import psycopg2.extras
from pprint import pprint
import pandas as pd

import plotly.io as pio
import plotly.express as px

import os
from dotenv import load_dotenv
load_dotenv()

url = urlparse(os.environ.get("postgres_url"))
connection = psycopg2.connect(
    host=url.hostname,
    port=url.port,
    database=url.path[1:],
    user=url.username,
    password=url.password
)
cursor = connection.cursor(cursor_factory = psycopg2.extras.RealDictCursor)

# Example Test Bar Chart

In [None]:
pio.renderers.default = "png"
fig = px.bar(x=["a", "b", "c"], y=[1, 3, 2])
fig.show()

# Earliest message per Guild

In [None]:
query = """
select guilds_t.guild_name , time_sorted_messages_t.guild_id, channels_t.channel_name, channel_id, content, time_sorted_messages_t.real_timestamp from messages_t
join (
    SELECT guild_id, MIN(real_timestamp) as real_timestamp 
    FROM messages_t
    GROUP BY guild_id
) as time_sorted_messages_t
on time_sorted_messages_t.real_timestamp = messages_t.real_timestamp
join guilds_t on time_sorted_messages_t.guild_id = guilds_t.id
join channels_t on messages_t.channel_id = channels_t.id
order by real_timestamp ASC;
"""

In [None]:
cursor.execute(query)
ealiest_message_per_guild_results = cursor.fetchall()
df = pd.DataFrame.from_dict(ealiest_message_per_guild_results)

In [None]:
df

# Number of Messages Per Guild

In [None]:
query = """
select guilds_t.id, guilds_t.guild_name , message_count_t.message_count
from ( select messages_t.guild_id, count(messages_t.id) as message_count
from messages_t
group by messages_t.guild_id ) as message_count_t
join guilds_t on message_count_t.guild_id = guilds_t.id
order by message_count ASC;
"""
cursor.execute(query)
results_0 = cursor.fetchall()
# pprint(results_0[:3])

In [None]:
x_axis = []
y_axis = []
for guild in results_0:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["message_count"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Number of Messages Per Guild",
    labels = {'x':"Guild Names", 
              'y':'Total Number of Messages'}
)
fig.show()

# Number of Authors per Guild

In [None]:
query = """
select 
    guilds_t.id, 
    guilds_t.guild_name, 
    guild_author_count_t.author_count 
FROM 
(
    select distinct guild_id, COUNT(distinct(author)) as author_count
    from messages_t mt 
    group by guild_id
) as guild_author_count_t
join guilds_t on guild_author_count_t.guild_id = guilds_t.id
order by guild_author_count_t.author_count asc;
"""
cursor.execute(query)
author_per_guild = cursor.fetchall()
pprint(author_per_guild[:3])

In [None]:
x_axis = []
y_axis = []
for guild in author_per_guild:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["author_count"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Number of Authors Per Guild",
    labels = {'x':"Guild Names", 
              'y':'Total Number of Authors'}
)
fig.show()

# What percentage of Authors in each Guild have posted less than 5 messages

In [None]:
min_message_count = 5
query = f"""
select 
	guild_id,
	guild_name,
	users_more_x_messages,
	author_raw_count,
	CAST(users_more_x_messages AS FLOAT) / CAST(author_raw_count AS FLOAT) * 100 as author_num_percentage
from 
(
	SELECT 
		author_messages_threshold_t.guild_id as guild_id,
		author_messages_threshold_t.guild_name as guild_name,
		author_messages_threshold_t.users_more_x_messages as users_more_x_messages,
		author_count_t.author_raw_count as author_raw_count
	FROM 
	(
		select 
			guild_message_count_t.guild_id as guild_id,
			guilds_t.guild_name,
			count(*) as users_more_x_messages
		from 
		(
			select * FROM
				(
					select 
						guild_id,
						author,
						count(content) as msg_count
					from messages_t
					group by guild_id, author 
				) as raw_author_message_count
			where msg_count < {min_message_count}
		) as guild_message_count_t
		join guilds_t on guild_message_count_t.guild_id = guilds_t.id
		group by guild_message_count_t.guild_id, guilds_t.guild_name
	) as author_messages_threshold_t 
	JOIN 
	(
		select 
			guilds_t.id as guild_id, 
			guilds_t.guild_name, 
			guild_author_count_t.author_raw_count 
		FROM (
			select distinct guild_id, COUNT(distinct(author)) as author_raw_count
			from messages_t mt 
			group by guild_id
		) as guild_author_count_t
		join guilds_t on guild_author_count_t.guild_id = guilds_t.id
		order by guild_author_count_t.author_raw_count asc
	) as author_count_t
	ON author_messages_threshold_t.guild_id = author_count_t.guild_id
) as raw_author_message_count_t
order by author_num_percentage asc;
"""
cursor.execute(query)
min_5_messages_percentage = cursor.fetchall()
pprint(min_5_messages_percentage[:3])

In [None]:
x_axis = []
y_axis = []
for guild in min_5_messages_percentage:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["author_num_percentage"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Percentage of Authors with less than 5 Messages",
    labels = {'x':"Guild Names", 
              'y':'Percentage of Users with Less then 5 Messages'}
)
fig.show()

# What percentage of Authors in each Guild have posted more than 20 messages

In [None]:
min_message_count = 20
query = f"""
select 
	guild_id,
	guild_name,
	users_more_x_messages,
	author_raw_count,
	CAST(users_more_x_messages AS FLOAT) / CAST(author_raw_count AS FLOAT) * 100 as author_num_percentage
from 
(
	SELECT 
		author_messages_threshold_t.guild_id as guild_id,
		author_messages_threshold_t.guild_name as guild_name,
		author_messages_threshold_t.users_more_x_messages as users_more_x_messages,
		author_count_t.author_raw_count as author_raw_count
	FROM 
	(
		select 
			guild_message_count_t.guild_id as guild_id,
			guilds_t.guild_name,
			count(*) as users_more_x_messages
		from 
		(
			select * FROM
				(
					select 
						guild_id,
						author,
						count(content) as msg_count
					from messages_t
					group by guild_id, author 
				) as raw_author_message_count
			where msg_count > {min_message_count}
		) as guild_message_count_t
		join guilds_t on guild_message_count_t.guild_id = guilds_t.id
		group by guild_message_count_t.guild_id, guilds_t.guild_name
	) as author_messages_threshold_t 
	JOIN 
	(
		select 
			guilds_t.id as guild_id, 
			guilds_t.guild_name, 
			guild_author_count_t.author_raw_count 
		FROM (
			select distinct guild_id, COUNT(distinct(author)) as author_raw_count
			from messages_t mt 
			group by guild_id
		) as guild_author_count_t
		join guilds_t on guild_author_count_t.guild_id = guilds_t.id
		order by guild_author_count_t.author_raw_count asc
	) as author_count_t
	ON author_messages_threshold_t.guild_id = author_count_t.guild_id
) as raw_author_message_count_t
order by author_num_percentage asc;
"""
cursor.execute(query)
author_more_20_message_percentage = cursor.fetchall()
pprint(author_per_guild[:3])

In [None]:
x_axis = []
y_axis = []
for guild in author_more_20_message_percentage:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["author_num_percentage"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Percentage of Authors with More than 20 Messages",
    labels = {'x':"Guild Names", 
              'y':'Percentage of Users with More than 20 Messages'}
)
fig.show()

# What is the most, and second most, active month for each Discord Guild?

In [None]:
query = """
select distinct guilds_t.id , guilds_t.guild_name, month_timestamp, msg_count from (
	select
		distinct DATE_TRUNC('month', real_timestamp)
			         AS  month_timestamp,
	    COUNT(guild_id) AS msg_count,
	    guild_id 
	FROM messages_t
	GROUP BY guild_id, month_timestamp
) as month_messages_t
join guilds_t on month_messages_t.guild_id = guilds_t.id
order by guilds_t.id, month_timestamp;
"""
cursor.execute(query)
active_months = cursor.fetchall()
active_months_df = pd.DataFrame.from_dict(active_months)

In [None]:
fig = px.line(
    active_months_df,
    x="month_timestamp",
    y="msg_count",
    color='guild_name',
    title = "Message Count per Month per DIscord Guild",
    labels = {'x':"Guild Names", 
              'y':'Number of Messages That Month'},
    width=1920,
    height=1080
)
fig.show()

# CALPLOT STUFF

In [None]:
biggest_guild = results_0[-1]
query = """
select distinct guilds_t.id , guilds_t.guild_name, month_timestamp, msg_count from (
	select
		distinct DATE_TRUNC('day', real_timestamp)
			         AS  month_timestamp,
	    COUNT(guild_id) AS msg_count,
	    guild_id 
	FROM messages_t
	GROUP BY guild_id, month_timestamp
) as month_messages_t
join guilds_t on month_messages_t.guild_id = guilds_t.id
where guilds_t.id = '%s'
order by guilds_t.id, month_timestamp;
"""
query = query % str(biggest_guild["id"])

In [None]:
cursor.execute(query)
print(query)
total_num_messages_per_guild = cursor.fetchall()

In [None]:
import pandas as pd
import calmap
import numpy as np; np.random.seed(sum(map(ord, 'calmap')))

In [None]:
df = pd.DataFrame.from_dict(total_num_messages_per_guild)
min_value = df['month_timestamp'].min()
max_value = df['month_timestamp'].max()
df['normalized_message_count'] = (df['month_timestamp'] - min_value) / (max_value - min_value)
all_days = pd.date_range('03/05/2018', periods=2005, freq='D')
days = np.random.choice(all_days, 2005)
events = pd.Series(np.random.randn(len(days)), index=days)
days.sort()
my_events = pd.Series(list( df['normalized_message_count'] ), index = days)

In [None]:
calmap.yearplot(my_events, year=2019)

In [None]:
calmap.yearplot(my_events, year=2020)

In [None]:
calmap.calendarplot(
    my_events,
    monthticks=3,
    daylabels='MTWTFSS',
    dayticks=[0, 2, 4, 6],
    cmap='YlGn',
    fillcolor='grey',
    linewidth=0,
    fig_kws=dict(figsize=(20, 10)),
    yearlabels=True,
    yearascending=True
)