In [None]:
# !pip install psycopg2
# !pip install plotly
# !pip install kaleido
# !pip install calmap
# !pip install "plotly>=5" "ipywidgets>=7.6"

In [None]:
from urllib.parse import urlparse
import psycopg2
import psycopg2.extras
from pprint import pprint
import pandas as pd

import plotly.io as pio
import plotly.express as px

import os
from dotenv import load_dotenv
load_dotenv()

url = urlparse(os.environ.get("db_url"))
connection = psycopg2.connect(
    host=url.hostname,
    port=url.port,
    database=url.path[1:],
    user=url.username,
    password=url.password
)
cursor = connection.cursor(cursor_factory = psycopg2.extras.RealDictCursor)

# Earliest message per Guild

In [None]:
query = """
select guilds_t.guild_name , time_sorted_messages_t.guild_id, channels_t.channel_name, channel_id, content, time_sorted_messages_t.msg_timestamp from messages_t
join (
    SELECT guild_id, MIN(msg_timestamp) as msg_timestamp 
    FROM messages_t
    GROUP BY guild_id
) as time_sorted_messages_t
on time_sorted_messages_t.msg_timestamp = messages_t.msg_timestamp
join guilds_t on time_sorted_messages_t.guild_id = guilds_t.id
join channels_t on messages_t.channel_id = channels_t.id
order by msg_timestamp ASC;
"""

In [None]:
cursor.execute(query)
ealiest_message_per_guild_results = cursor.fetchall()
df = pd.DataFrame.from_dict(ealiest_message_per_guild_results)

In [None]:
df

# Number of Messages Per Guild

In [None]:
query = """
select guilds_t.id, guilds_t.guild_name , message_count_t.message_count
from ( select messages_t.guild_id, count(messages_t.id) as message_count
from messages_t
group by messages_t.guild_id ) as message_count_t
join guilds_t on message_count_t.guild_id = guilds_t.id
order by message_count ASC;
"""
cursor.execute(query)
results_0 = cursor.fetchall()
# pprint(results_0[:3])

In [None]:
x_axis = []
y_axis = []
for guild in results_0:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["message_count"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Number of Messages Per Guild",
    labels = {'x':"Guild Names", 
              'y':'Total Number of Messages'}
)
fig.show()

# Number of Authors per Guild

In [None]:
query = """
select 
    guilds_t.id, 
    guilds_t.guild_name, 
    guild_author_count_t.author_count 
FROM 
(
    select distinct guild_id, COUNT(distinct(author_id)) as author_count
    from messages_t mt 
    group by guild_id
) as guild_author_count_t
join guilds_t on guild_author_count_t.guild_id = guilds_t.id
order by guild_author_count_t.author_count asc;
"""
cursor.execute(query)
author_per_guild = cursor.fetchall()
pprint(author_per_guild[:3])

In [None]:
x_axis = []
y_axis = []
for guild in author_per_guild:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["author_count"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Number of Authors Per Guild",
    labels = {'x':"Guild Names", 
              'y':'Total Number of Authors'}
)
fig.show()

# Number of Channels per Guild

In [None]:
query = """
select 
    guilds_t.id, 
    guilds_t.guild_name, 
    guild_channel_count_t.channel_count 
FROM 
(
    select distinct 
    	guild_id,
    	COUNT(distinct(channel_id)) as channel_count
    from messages_t mt 
    group by guild_id
) as guild_channel_count_t
join guilds_t on guild_channel_count_t.guild_id = guilds_t.id
order by guild_channel_count_t.channel_count asc;
"""
cursor.execute(query)
results_0 = cursor.fetchall()
pprint(results_0[:3])

In [None]:
x_axis = []
y_axis = []
for guild in author_per_guild:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["author_count"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Number of Channels Per Guild",
    labels = {'x':"Guild Names", 
              'y':'Total Numver of Channels'}
)
fig.show()

# What percentage of Authors in each Guild have posted less than 5 messages

In [None]:
min_message_count = 5
query = f"""
select 
	guild_id,
	guild_name,
	users_more_x_messages,
	author_raw_count,
	CAST(users_more_x_messages AS FLOAT) / CAST(author_raw_count AS FLOAT) * 100 as author_num_percentage
from 
(
	SELECT 
		author_messages_threshold_t.guild_id as guild_id,
		author_messages_threshold_t.guild_name as guild_name,
		author_messages_threshold_t.users_more_x_messages as users_more_x_messages,
		author_count_t.author_raw_count as author_raw_count
	FROM 
	(
		select 
			guild_message_count_t.guild_id as guild_id,
			guilds_t.guild_name,
			count(*) as users_more_x_messages
		from 
		(
			select * FROM
				(
					select 
						guild_id,
						author_id,
						count(content) as msg_count
					from messages_t
					group by guild_id, author_id
				) as raw_author_message_count
			where msg_count < {min_message_count}
		) as guild_message_count_t
		join guilds_t on guild_message_count_t.guild_id = guilds_t.id
		group by guild_message_count_t.guild_id, guilds_t.guild_name
	) as author_messages_threshold_t 
	JOIN 
	(
		select 
			guilds_t.id as guild_id, 
			guilds_t.guild_name, 
			guild_author_count_t.author_raw_count 
		FROM (
			select distinct guild_id, COUNT(distinct(author_id)) as author_raw_count
			from messages_t mt 
			group by guild_id
		) as guild_author_count_t
		join guilds_t on guild_author_count_t.guild_id = guilds_t.id
		order by guild_author_count_t.author_raw_count asc
	) as author_count_t
	ON author_messages_threshold_t.guild_id = author_count_t.guild_id
) as raw_author_message_count_t
order by author_num_percentage asc;
"""
cursor.execute(query)
min_5_messages_percentage = cursor.fetchall()
# pprint(min_5_messages_percentage[:3])

In [None]:
x_axis = []
y_axis = []
for guild in min_5_messages_percentage:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["author_num_percentage"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Percentage of Authors with less than 5 Messages",
    labels = {'x':"Guild Names", 
              'y':'Percentage of Users with Less then 5 Messages'}
)
fig.show()

# What percentage of Authors in each Guild have posted more than 20 messages

In [None]:
min_message_count = 20
query = f"""
select 
	guild_id,
	guild_name,
	users_more_x_messages,
	author_raw_count,
	CAST(users_more_x_messages AS FLOAT) / CAST(author_raw_count AS FLOAT) * 100 as author_num_percentage
from 
(
	SELECT 
		author_messages_threshold_t.guild_id as guild_id,
		author_messages_threshold_t.guild_name as guild_name,
		author_messages_threshold_t.users_more_x_messages as users_more_x_messages,
		author_count_t.author_raw_count as author_raw_count
	FROM 
	(
		select 
			guild_message_count_t.guild_id as guild_id,
			guilds_t.guild_name,
			count(*) as users_more_x_messages
		from 
		(
			select * FROM
				(
					select 
						guild_id,
						author_id,
						count(content) as msg_count
					from messages_t
					group by guild_id, author_id
				) as raw_author_message_count
			where msg_count > {min_message_count}
		) as guild_message_count_t
		join guilds_t on guild_message_count_t.guild_id = guilds_t.id
		group by guild_message_count_t.guild_id, guilds_t.guild_name
	) as author_messages_threshold_t 
	JOIN 
	(
		select 
			guilds_t.id as guild_id, 
			guilds_t.guild_name, 
			guild_author_count_t.author_raw_count 
		FROM (
			select distinct guild_id, COUNT(distinct(author_id)) as author_raw_count
			from messages_t mt 
			group by guild_id
		) as guild_author_count_t
		join guilds_t on guild_author_count_t.guild_id = guilds_t.id
		order by guild_author_count_t.author_raw_count asc
	) as author_count_t
	ON author_messages_threshold_t.guild_id = author_count_t.guild_id
) as raw_author_message_count_t
order by author_num_percentage asc;
"""
cursor.execute(query)
author_more_20_message_percentage = cursor.fetchall()
# pprint(author_per_guild[:3])

In [None]:
x_axis = []
y_axis = []
for guild in author_more_20_message_percentage:
    x_axis.append(guild["guild_name"])
    y_axis.append(guild["author_num_percentage"])
fig = px.bar(
    x=x_axis, 
    y=y_axis,
    title = "Percentage of Authors with More than 20 Messages",
    labels = {'x':"Guild Names", 
              'y':'Percentage of Users with More than 20 Messages'}
)
fig.show()

# What is the most, and second most, active month for each Discord Guild?

In [None]:
query = """
select distinct guilds_t.id , guilds_t.guild_name, month_timestamp, msg_count from (
	select
		distinct DATE_TRUNC('month', msg_timestamp)
			         AS  month_timestamp,
	    COUNT(guild_id) AS msg_count,
	    guild_id 
	FROM messages_t
	GROUP BY guild_id, month_timestamp
) as month_messages_t
join guilds_t on month_messages_t.guild_id = guilds_t.id
order by guilds_t.id, month_timestamp;
"""
cursor.execute(query)
active_months = cursor.fetchall()
active_months_df = pd.DataFrame.from_dict(active_months)

In [None]:
fig = px.line(
    active_months_df,
    x="month_timestamp",
    y="msg_count",
    color='guild_name',
    title = "Message Count per Month per DIscord Guild",
    labels = {'x':"Guild Names", 
              'y':'Number of Messages That Month'},
    width=1920,
    height=1080
)
fig.show()

# What is the most, and second most, active month for each Discord Guild?

In [None]:
import pandas as pd

# Create a sample DataFrame
data = {
    'Label': ['A', 'B', 'A', 'B', 'A', 'B'],
    'Value': [10, 20, 30, 40, 50, 60],
}

df = pd.DataFrame(data)

# Define a function to normalize data within each group while preserving labels
def normalize_data(df, label, value):
    df['Normalized'] = df.groupby(label)[value].transform(lambda x: (x - x.min()) / (x.max() - x.min()))
    return df

# Normalize the data while preserving labels
normalized_df = normalize_data(df, 'Label', 'Value')

print("Original DataFrame:")
print(df)

print("\nNormalized DataFrame:")
print(normalized_df)

In [None]:
active_months_df

In [None]:
def normalize_data(df):
    df['normalized_value'] = df.groupby('guild_name')['msg_count'].transform(lambda x: (x - x.min()) / (x.max() - x.min()))
    return df
normalized_monthly_activity_df = normalize_data(active_months_df)

In [None]:
normalized_monthly_activity_df

In [None]:
fig = px.line(
    active_months_df,
    x="month_timestamp",
    y="normalized_value",
    color='guild_name',
    title = "Message Count Normalized",
    labels = {'x':"Guild Names", 
              'y':'Number of Messages Normalzied'},
    width=1920,
    height=1080
)
pio.renderers.default = "png"
fig.show()

# What is the name of the most active channel in each discord guild?

In [None]:
query = """
select 
	channels_t.channel_name,
 	guilds_t.guild_name,
  	channel_message_count_t.message_count,
	channel_message_count_t.guild_id,
	channel_message_count_t.channel_id
from (
	select guild_id, channel_id, COUNT(id) as message_count from messages_t
	group by guild_id, channel_id
) as channel_message_count_t
join (
	select distinct guild_id, max(message_count) max_messages from
	(
		select guild_id, channel_id, COUNT(id) as message_count from messages_t
		group by guild_id, channel_id
	) as count_channel_messages_t
	group by guild_id
) as channel_message_max_t on channel_message_max_t.max_messages = channel_message_count_t.message_count
join guilds_t on channel_message_count_t.guild_id = guilds_t.id
join channels_t on channel_message_count_t.channel_id = channels_t.id
order by channel_message_count_t.message_count desc;
"""
cursor.execute(query)
most_active_channel_per_guild = cursor.fetchall()
most_active_channel_per_guild_df = pd.DataFrame.from_dict(most_active_channel_per_guild)

In [None]:
most_active_channel_per_guild_df

In [None]:

fig = px.bar(
    y=most_active_channel_per_guild_df["channel_name"] + " - " + most_active_channel_per_guild_df["guild_name"], 
    x=most_active_channel_per_guild_df["message_count"],
    title = "Percentage of Authors with More than 20 Messages",
    labels = {'y':"Channel and Guild Names", 
              'x':'Message Count'}
)
pio.renderers.default = "png"
fig.show()

# What is the average message count for most active 30 days of a Discord Guild?

In [None]:
query = """
SELECT distinct id from guilds_t;
"""
cursor.execute(query)
guilds_id_list = cursor.fetchall()


tmp_guild_id = guilds_id_list[0]
average_message_count_per_days_query = f"""
select 
    guilds_t.id,
    guilds_t.guild_name,
    avg_msg_count,
    total_num_days
from (
    select 
        msg_count_per_day_t.guild_id,
        avg(msg_count_per_day_t.msg_count) as avg_msg_count,
        max(month_timestamp) - min(month_timestamp) as total_num_days
    FROM
    (
        select distinct 
            month_messages_t.guild_id as guild_id, 
            month_timestamp, 
            msg_count
        from (
            select
                distinct DATE_TRUNC('day', msg_timestamp)
                             AS  month_timestamp,
                COUNT(guild_id) AS msg_count,
                guild_id 
            FROM messages_t
            WHERE messages_t.guild_id = '{tmp_guild_id}'
            GROUP BY guild_id, month_timestamp
        ) as month_messages_t
        order by msg_count desc
        limit 30 -- set_day_limit
    ) as msg_count_per_day_t
    group by guild_id
) as msg_count_per_day_t
join guilds_t on msg_count_per_day_t.guild_id = guilds_t.id ;
"""
average_message_count_per_days = cursor.fetchall()
unioned_average_message_count_per_days_df = pd.DataFrame.from_dict(average_message_count_per_days)
for guild_id in guilds_id_list[1:]:
    tmp_guild_id = guild_id["id"]
    # print(tmp_guild_id)
    average_message_count_per_days_query = f"""
    select 
        guilds_t.id,
        guilds_t.guild_name,
        avg_msg_count,
        total_num_days
    from (
        select 
            msg_count_per_day_t.guild_id,
            avg(msg_count_per_day_t.msg_count) as avg_msg_count,
            max(month_timestamp) - min(month_timestamp) as total_num_days
        FROM
        (
            select distinct 
                month_messages_t.guild_id as guild_id, 
                month_timestamp, 
                msg_count
            from (
                select
                    distinct DATE_TRUNC('day', msg_timestamp)
                                 AS  month_timestamp,
                    COUNT(guild_id) AS msg_count,
                    guild_id 
                FROM messages_t
                WHERE messages_t.guild_id = '{tmp_guild_id}'
                GROUP BY guild_id, month_timestamp
            ) as month_messages_t
            order by msg_count desc
            limit 30 -- set_day_limit
        ) as msg_count_per_day_t
        group by guild_id
    ) as msg_count_per_day_t
    join guilds_t on msg_count_per_day_t.guild_id = guilds_t.id ;
    """
    # print(average_message_count_per_days_query)
    cursor.execute(average_message_count_per_days_query)
    average_message_count_per_days = cursor.fetchall()
    average_message_count_per_days_df = pd.DataFrame.from_dict(average_message_count_per_days)
    unioned_average_message_count_per_days_df = aresult = pd.concat([unioned_average_message_count_per_days_df, average_message_count_per_days_df], axis=0)
unioned_average_message_count_per_days_df['nano_second_difference'] = pd.to_numeric(unioned_average_message_count_per_days_df['total_num_days'], errors='coerce').fillna(0).astype(int)
unioned_average_message_count_per_days_df['days'] = unioned_average_message_count_per_days_df['nano_second_difference'] /  (60 * 60 * 24 * 1000 * 1000 * 1000)

In [None]:
unioned_average_message_count_per_days_df

In [None]:
import plotly.graph_objects as go
fig = go.Figure(data=[
    go.Bar(name='avg_msg_count', x=unioned_average_message_count_per_days_df["guild_name"], y=unioned_average_message_count_per_days_df["avg_msg_count"]),
    go.Bar(name='total_num_days', x=unioned_average_message_count_per_days_df["guild_name"], y=unioned_average_message_count_per_days_df["days"])
])
pio.renderers.default = "png"
fig.update_layout(barmode='group')
fig.show()

# What percentage of days actually have messages?

In [None]:
query = """
SELECT distinct id from guilds_t;
"""
cursor.execute(query)
guilds_id_list = cursor.fetchall()


tmp_guild_id = guilds_id_list[0]
average_message_count_per_days_query = f"""
select 
	id,
	guild_name,
	min(day_timestamp)::DATE as earliest_date,
	max(day_timestamp)::DATE as latest_date,
	count(*) as days_with_messages,
    max(day_timestamp)::DATE - min(day_timestamp)::DATE as total_num_of_days,
    cast( count(*) as FLOAT) / cast( ( max(day_timestamp)::DATE - min(day_timestamp)::DATE ) as Float)* 100 as percentage_of_days
from
(
	select 
		distinct 
			guilds_t.id,
			guilds_t.guild_name,
			day_timestamp,
			msg_count 
	from (
		select distinct
			DATE_TRUNC('day', msg_timestamp) AS  day_timestamp,
		    COUNT(guild_id) AS msg_count,
		    guild_id 
		FROM messages_t
		WHERE messages_t.guild_id = '{tmp_guild_id}'
		GROUP BY guild_id, day_timestamp
	) as month_messages_t
	join guilds_t on month_messages_t.guild_id = guilds_t.id
	order by day_timestamp desc
) as daily_msg_stats_t
group by id, guild_name;
    """
percentage_of_days_with_messages = cursor.fetchall()
unioned_percentage_of_days_with_messages_df = pd.DataFrame.from_dict(percentage_of_days_with_messages)
for guild_id in guilds_id_list[1:]:
    tmp_guild_id = guild_id["id"]
    # print(tmp_guild_id)
    average_message_count_per_days_query = f"""
select 
	id,
	guild_name,
	min(day_timestamp)::DATE as earliest_date,
	max(day_timestamp)::DATE as latest_date,
	count(*) as days_with_messages,
    max(day_timestamp)::DATE - min(day_timestamp)::DATE as total_num_of_days,
    cast( count(*) as FLOAT) / cast( ( max(day_timestamp)::DATE - min(day_timestamp)::DATE ) as Float)* 100 as percentage_of_days
from
(
	select 
		distinct 
			guilds_t.id,
			guilds_t.guild_name,
			day_timestamp,
			msg_count 
	from (
		select distinct
			DATE_TRUNC('day', msg_timestamp) AS  day_timestamp,
		    COUNT(guild_id) AS msg_count,
		    guild_id 
		FROM messages_t
		WHERE messages_t.guild_id = '{tmp_guild_id}'
		GROUP BY guild_id, day_timestamp
	) as month_messages_t
	join guilds_t on month_messages_t.guild_id = guilds_t.id
	order by day_timestamp desc
) as daily_msg_stats_t
group by id, guild_name;
    """
    # print(average_message_count_per_days_query)
    cursor.execute(average_message_count_per_days_query)
    percentage_of_days_with_messages = cursor.fetchall()
    percentage_of_days_with_messages_df = pd.DataFrame.from_dict(percentage_of_days_with_messages)
    unioned_percentage_of_days_with_messages_df = aresult = pd.concat([unioned_percentage_of_days_with_messages_df, percentage_of_days_with_messages_df], axis=0)
unioned_percentage_of_days_with_messages_df = unioned_percentage_of_days_with_messages_df.sort_values(by='percentage_of_days', ascending=True)

In [None]:
unioned_percentage_of_days_with_messages_df

In [None]:
import plotly.graph_objects as go
fig = go.Figure(
    data=[
        go.Bar(name='percentage_of_days',  y=unioned_percentage_of_days_with_messages_df["guild_name"], xaxis='x2'  , x = unioned_percentage_of_days_with_messages_df["percentage_of_days"], offsetgroup=1, orientation='h'),
        go.Bar(name='total_msg_count',     y=unioned_percentage_of_days_with_messages_df["guild_name"], xaxis='x' , x = unioned_percentage_of_days_with_messages_df["total_num_of_days"], offsetgroup=2, orientation='h')
    ],
    layout={
        'xaxis': {'title': 'total_msg_count'},
        'xaxis2': {'title': 'percentage_of_days', 'overlaying': 'x', 'side': 'top'}
    }
    
)
pio.renderers.default = "png"
fig.update_layout(barmode='group')
fig.show()

# What is the average half life of top 30% of users

In [None]:
query = """
SELECT distinct id from guilds_t;
"""
cursor.execute(query)
guilds_id_list = cursor.fetchall()


tmp_guild_id = guilds_id_list[0]
average_message_count_per_days_query = f"""
SELECT 
	guilds_t.guild_name,
	guilds_t.id,
	EXTRACT(DAY FROM average_author_half_life) as average_author_half_life,
	average_author_half_life as average_author_half_life_timestamp
from
(
	select 
		guild_id,
		avg(author_half_life) as average_author_half_life
	from
	(
		select 
			guild_message_count_t.guild_id as guild_id,
			guilds_t.guild_name,
			msg_count,
			max_msg_timestamp,
			min_msg_timestamp,
			max_msg_timestamp - min_msg_timestamp as author_half_life
		from 
		(
			select * FROM
				(
					select 
						guild_id,
						author_id,
						count(content) as msg_count,
						max(msg_timestamp) as max_msg_timestamp,
						min(msg_timestamp) as min_msg_timestamp
					from messages_t
					group by guild_id, author 
				) as msg_something_t
			where msg_count > 2 -- min_message_length
			and guild_id = '{tmp_guild_id}'
		) as guild_message_count_t
		join guilds_t on guild_message_count_t.guild_id = guilds_t.id
		order by author_half_life desc
		limit (
			select cast(  cast(author_count as float) / 100 * 30 as Integer) -- Percentage
				as percentage_msg_count
			from 
			(
				select guild_id, COUNT(*) as author_count FROM
					(
						select 
							guild_id,
							author_id,
							count(content) as msg_count
						from messages_t
						group by guild_id, author 
					) as msg_something_t
				where msg_count > 2 -- min_message_length
				and guild_id = (select id from guilds_t limit 1 offset 0)
				group by guild_id
			) as author_count_t
		)
	)  as avg_halflife_percentage_author_t
	group by guild_id
) as average_author_half_list
join guilds_t on guilds_t.id = average_author_half_list.guild_id
    """
percentage_of_days_with_messages = cursor.fetchall()
unioned_percentage_of_days_with_messages_df = pd.DataFrame.from_dict(percentage_of_days_with_messages)
for guild_id in guilds_id_list[1:]:
    tmp_guild_id = guild_id["id"]
    # print(tmp_guild_id)
    average_message_count_per_days_query = f"""
SELECT 
	guilds_t.guild_name,
	guilds_t.id,
	EXTRACT(DAY FROM average_author_half_life) as average_author_half_life,
	average_author_half_life as average_author_half_life_timestamp
from
(
	select 
		guild_id,
		avg(author_half_life) as average_author_half_life
	from
	(
		select 
			guild_message_count_t.guild_id as guild_id,
			guilds_t.guild_name,
			msg_count,
			max_msg_timestamp,
			min_msg_timestamp,
			max_msg_timestamp - min_msg_timestamp as author_half_life
		from 
		(
			select * FROM
				(
					select 
						guild_id,
						author_id,
						count(content) as msg_count,
						max(msg_timestamp) as max_msg_timestamp,
						min(msg_timestamp) as min_msg_timestamp
					from messages_t
					group by guild_id, author 
				) as msg_something_t
			where msg_count > 2 -- min_message_length
			and guild_id = '{tmp_guild_id}'
		) as guild_message_count_t
		join guilds_t on guild_message_count_t.guild_id = guilds_t.id
		order by author_half_life desc
		limit (
			select cast(  cast(author_count as float) / 100 * 30 as Integer) -- Percentage
				as percentage_msg_count
			from 
			(
				select guild_id, COUNT(*) as author_count FROM
					(
						select 
							guild_id,
							author_id,
							count(content) as msg_count
						from messages_t
						group by guild_id, author 
					) as msg_something_t
				where msg_count > 2 -- min_message_length
				and guild_id = (select id from guilds_t limit 1 offset 0)
				group by guild_id
			) as author_count_t
		)
	)  as avg_halflife_percentage_author_t
	group by guild_id
) as average_author_half_list
join guilds_t on guilds_t.id = average_author_half_list.guild_id
    """
    # print(average_message_count_per_days_query)
    cursor.execute(average_message_count_per_days_query)
    percentage_of_days_with_messages = cursor.fetchall()
    percentage_of_days_with_messages_df = pd.DataFrame.from_dict(percentage_of_days_with_messages)
    unioned_percentage_of_days_with_messages_df = aresult = pd.concat([unioned_percentage_of_days_with_messages_df, percentage_of_days_with_messages_df], axis=0)
unioned_percentage_of_days_with_messages_df = unioned_percentage_of_days_with_messages_df.sort_values(by='average_author_half_life', ascending=True)

In [None]:
unioned_percentage_of_days_with_messages_df

In [None]:
fig = px.bar(
    x=unioned_percentage_of_days_with_messages_df["guild_name"], 
    y=unioned_percentage_of_days_with_messages_df["average_author_half_life"],
    title = "Percentage of Authors with More than 20 Messages",
    labels = {'x':"Guild Names", 
              'y':'Author Half Life'}
)
fig.show()

In [None]:
# # Heatmap of a particular Guild



# import pandas as pd
# import calmap
# import numpy as np; np.random.seed(sum(map(ord, 'calmap')))
# df = pd.DataFrame.from_dict(total_num_messages_per_guild)
# min_value = df['month_timestamp'].min()
# max_value = df['month_timestamp'].max()
# df['normalized_message_count'] = (df['month_timestamp'] - min_value) / (max_value - min_value)
# all_days = pd.date_range('03/05/2018', periods=2005, freq='D')
# days = np.random.choice(all_days, 2005)
# events = pd.Series(np.random.randn(len(days)), index=days)
# days.sort()
# my_events = pd.Series(list( df['normalized_message_count'] ), index = days)
# calmap.yearplot(my_events, year=2019)
# calmap.calendarplot(
#     my_events,
#     monthticks=3,
#     daylabels='MTWTFSS',
#     dayticks=[0, 2, 4, 6],
#     cmap='YlGn',
#     fillcolor='grey',
#     linewidth=0,
#     fig_kws=dict(figsize=(20, 10)),
#     yearlabels=True,
#     yearascending=True
# )