In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

import os
os.chdir('/home/ubuntu/Masters_Thesis')

from scripts.utilities.visualisation import *

from matplotlib import font_manager
font_dirs = ['/home/ubuntu/Masters_Thesis/Fonts']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
for font_file in font_files:
    font_manager.fontManager.addfont(font_file)
plt.style.use('https://raw.githubusercontent.com/benckj/mpl_style/main/uzh.mplstyle')

In [None]:
data_unprocessed_with_bots = pd.read_csv("/mnt/dataset1/raw_data/Bitcoin_twitter_data_english_non_textprocessed.csv", lineterminator='\n')
data_unprocessed_with_bots.drop(columns=["Unnamed: 0", "user_location", "user_description"], inplace=True)

data_unprocessed_wo_bots = pd.read_csv("/mnt/dataset1/raw_data/Bitcoin_twitter_data_english_non_textprocessed_botremovedtweets.csv", lineterminator='\n')
data_unprocessed_wo_bots.drop(columns=["Unnamed: 0", "user_location", "user_description"], inplace=True)

In [None]:
data_unprocessed_with_bots['date'] = pd.to_datetime(data_unprocessed_with_bots['date'])
data_unprocessed_with_bots['user_created'] = pd.to_datetime(data_unprocessed_with_bots['user_created'])
data_unprocessed_with_bots['user_age'] = (data_unprocessed_with_bots["date"] - data_unprocessed_with_bots["user_created"]).dt.days

data_unprocessed_wo_bots['date'] = pd.to_datetime(data_unprocessed_wo_bots['date'])
data_unprocessed_wo_bots['user_created'] = pd.to_datetime(data_unprocessed_wo_bots['user_created'])
data_unprocessed_wo_bots['user_age'] = (data_unprocessed_wo_bots["date"] - data_unprocessed_wo_bots["user_created"]).dt.days

Tweet Count

In [None]:
# # With bots
# create_tweet_count_by_date_plot(data_unprocessed_with_bots, "W", bot_removed = False)

# # Without bots
# create_tweet_count_by_date_plot(data_unprocessed_wo_bots, "W", bot_removed = True)

User Based Analysis

1. Unique Users

In [None]:
# With bots
create_unique_user_barplot(data_unprocessed_with_bots, "W", bot_removed = False)

# # Without bots
create_unique_user_barplot(data_unprocessed_wo_bots, "W", bot_removed = True)

probability bins

In [None]:
user_data_withbots = data_unprocessed_with_bots[["date", "user_name", "user_age", "user_followers", "user_friends", "user_favourites", "user_verified"]]
user_data_wobots = data_unprocessed_wo_bots[["date", "user_name", "user_age", "user_followers", "user_friends", "user_favourites", "user_verified"]]

# create user influence metric
user_data_withbots['user_influence'] = ((user_data_withbots['user_followers']+1)/(np.log(user_data_withbots['user_friends']+1)+1))*(user_data_withbots['user_favourites']+1)*(user_data_withbots["user_verified"]+1)
user_data_wobots['user_influence'] = ((user_data_wobots['user_followers']+1)/(np.log(user_data_wobots['user_friends']+1)+1))*(user_data_wobots['user_favourites']+1)*(user_data_wobots["user_verified"]+1)

In [None]:
user_data_withbots_averaged = user_data_withbots[['user_name', 'user_followers', 'user_friends', 'user_favourites', 'user_influence']].groupby('user_name').agg('mean')
user_data_wobots_averaged = user_data_wobots[['user_name', 'user_followers', 'user_friends', 'user_favourites', 'user_influence']].groupby('user_name').agg('mean')

2. Probability bins plots

In [None]:
def create_user_bins_plot(data, metric, bin_count, bot_removed):
    data_fn = data[['user_name', metric]]
    
    percentiles = np.percentile(data_fn[metric], np.linspace(0, 100, bin_count+1))

    # Bin the data based on percentiles
    data_fn['bin'] = pd.cut(data_fn[metric], bins=percentiles, labels=False)

    # Count the number of unique users in each bin
    unique_users_per_bin = data_fn.groupby('bin')['user_name'].nunique()

    # Plot the table
    plt.bar(range(len(unique_users_per_bin)), unique_users_per_bin)
    plt.xlabel(f'Percentile Bins of average count of {metric}')
    plt.ylabel('Number of Users')
    plt.xticks(range(len(unique_users_per_bin)), [f'{int(percentiles[i])}-{int(percentiles[i+1])}' for i in range(len(percentiles)-1)], rotation=45)
    if bot_removed:
        plt.title(f'Number of Unique Users in Percentile Bins of {metric} after removing Bots')
    else:
        plt.title(f'Number of Unique Users in Percentile Bins of {metric} without removing Bots')
    plt.show()

In [None]:
# With bots
create_user_bins_plot(user_data_withbots, "user_followers", bin_count=10, bot_removed = False)

# Without bots
create_user_bins_plot(user_data_wobots, "user_followers", bin_count=10, bot_removed = True)

In [None]:
# With bots
create_user_bins_plot(user_data_withbots, "user_friends", bin_count=10, bot_removed = False)

# Without bots
create_user_bins_plot(user_data_wobots, "user_friends", bin_count=10, bot_removed = True)

In [None]:
# With bots
create_user_bins_plot(user_data_withbots, "user_favourites", bin_count=10, bot_removed = False)

# Without bots
create_user_bins_plot(user_data_wobots, "user_favourites", bin_count=10, bot_removed = True)

In [None]:
# With bots
create_user_bins_plot(user_data_withbots, "user_age", bin_count=10, bot_removed = False)

# Without bots
create_user_bins_plot(user_data_wobots, "user_age", bin_count=10, bot_removed = True)

In [None]:
# With bots
create_user_bins_plot(user_data_withbots, "user_influence", bin_count=10, bot_removed = False)

# Without bots
create_user_bins_plot(user_data_wobots, "user_influence", bin_count=10, bot_removed = True)

3. Time Series graphs

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_followers", bin_count=None, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_followers", bin_count=None, bot_removed = True)

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_followers", bin_count=10, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_followers", bin_count=10, bot_removed = True)

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_friends", bin_count=None, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_friends", bin_count=None, bot_removed = True)

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_friends", bin_count=10, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_friends", bin_count=10, bot_removed = True)

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_favourites", bin_count=None, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_favourites", bin_count=None, bot_removed = True)

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_favourites", bin_count=10, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_favourites", bin_count=10, bot_removed = True)

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_influence", bin_count=None, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_influence", bin_count=None, bot_removed = True)

In [None]:
# With bots
make_lineplot_with_bins_by_date(user_data_withbots, "user_influence", bin_count=8, bot_removed = False)

# Without bots
make_lineplot_with_bins_by_date(user_data_wobots, "user_influence", bin_count=8, bot_removed = True)