# The Effects of Social Media Bots on the Cryptomarket: Engagement Rate

*By Daniel Deutsch*

In [1]:
import warnings
from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Ignore warnings
warnings.filterwarnings('ignore')

# Matplotlib styles
plt.style.use('ggplot')
plt.rcParams.update({
    'figure.figsize': (15, 6),
    'axes.prop_cycle': plt.cycler(color=['#4C72B0', '#C44E52', '#55A868', '#8172B2', '#CCB974', '#64B5CD']),
    'axes.facecolor': '#EAEAF2'
})

# Constants
START_DATE = datetime(2019, 6, 1)
END_DATE = datetime(2022, 6, 1)

## Calculating Engagement Rate

### Read Datasets

In [3]:
df_twits = pd.read_csv("./datasets/classified/twits.csv.gz", index_col=0, parse_dates=['date'], low_memory=False)
df_users = pd.read_csv("./datasets/enhanced/users2.csv.gz", index_col=0, parse_dates=['join_date'], low_memory=False).add_prefix('user.')

### Data Processing

In [4]:
# Merges the users and the twits datasets
df_twits = pd.merge(df_twits, df_users, on='user.id', how='left')

# Merges information of original and predicted labels
df_twits['label_final'] = df_twits['label'].combine_first(df_twits['label_pred'])

# Localize the timezone of the date
df_twits['date'] = df_twits['date'].dt.tz_localize(None)

### Calculates the Engagement Rate

\begin{align*}
    \text{Engagement Rate} & = \frac{ likes + reshares }{followers + 1}
\end{align*}

In [5]:
df_twits['er'] = ( df_twits['n_likes'] + df_twits['n_reshares'] ) / (df_twits['user.followers'].clip(lower=0) + 1)

### Aggregates Engagement Rate

In [6]:
# Defines masks
mask_bull =  ( df_twits['label_final'] == 'Bullish' )
mask_bull_human = ( df_twits['label_final'] == 'Bullish' ) & ( df_twits['user.type'] == 'Human' )
mask_bull_bot = ( df_twits['label_final'] == 'Bullish' ) & ( df_twits['user.type'] == 'Bot' )
mask_bear =  ( df_twits['label_final'] == 'Bearish' )
mask_bear_human = ( df_twits['label_final'] == 'Bearish' ) & ( df_twits['user.type'] == 'Human' )
mask_bear_bot = ( df_twits['label_final'] == 'Bearish' ) & ( df_twits['user.type'] == 'Bot' )

# Obtains features dataframes
df_bull = df_twits[mask_bull].groupby(['base_asset', df_twits[mask_bull]['date'].dt.floor('h')])['er'].agg(er_bull='mean')
df_bull_human = df_twits[mask_bull_human].groupby(['base_asset', df_twits[mask_bull_human]['date'].dt.floor('h')])['er'].agg(n_twits_bull_human='size', er_bull_human='mean')
df_bull_bot = df_twits[mask_bull_bot].groupby(['base_asset', df_twits[mask_bull_bot]['date'].dt.floor('h')])['er'].agg(n_twits_bull_bot='size', er_bull_bot='mean')
df_bear = df_twits[mask_bear].groupby(['base_asset', df_twits[mask_bear]['date'].dt.floor('h')])['er'].agg(er_bear='mean')
df_bear_human = df_twits[mask_bear_human].groupby(['base_asset', df_twits[mask_bear_human]['date'].dt.floor('h')])['er'].agg(n_twits_bear_human='size', er_bear_human='mean')
df_bear_bot = df_twits[mask_bear_bot].groupby(['base_asset', df_twits[mask_bear_bot]['date'].dt.floor('h')])['er'].agg(n_twits_bear_bot='size', er_bear_bot='mean')

# Obtains final dataframe
df_er = pd.merge(df_bull, df_bull_human, left_index=True, right_index=True, how='outer')
df_er = pd.merge(df_er, df_bull_bot, left_index=True, right_index=True, how='outer')
df_er = pd.merge(df_er, df_bear, left_index=True, right_index=True, how='outer')
df_er = pd.merge(df_er, df_bear_human, left_index=True, right_index=True, how='outer')
df_er = pd.merge(df_er, df_bear_bot, left_index=True, right_index=True, how='outer')
df_er = df_er.unstack('base_asset').swaplevel(axis=1).sort_index(axis=1)

# Deletes unused dataframes
del df_twits, df_users, df_bull, df_bull_human, df_bull_bot, df_bear, df_bear_human, df_bear_bot

# Saves the final dataframe
df_er.to_csv("./datasets/engagement_rate.csv.gz")