In [1]:
import beneath
import pandas as pd
import plotly.express as px
from datetime import datetime

In [2]:
BLACKLIST = {
    "FOMO",
    "DD",
    "EOD",
    "TA",
    "PT",
    "RSI",
    "HUGE",
    "ATH",
    "USA",
    "AI",
    "IMO",
    "AM",
    "UK",
    "BIG",
    "SO",
    "OR",
    "FOR",
    "ALL",
    "IT",
    "BE",
    "ARE",
    "NOW",
    "ON",
    "ME",
    "CAN",
    "VERY",
    "SI",
    "TV",
    "BY",
    "NEW",
    "OUT",
    "LOVE",
    "GO",
    "PM",
    "NEXT",
    "ANY",
    "ET",
    "HAS",
    "ONE",
    "PLAY",
    "LOW",
    "III",
    "CASH",
    "RNG",
    "GOOD",
    "REAL",
    "SEE",
    "RE"
}

In [71]:
MODERATORS = {
    'OPINION_IS_UNPOPULAR',
    'CHAINSAW_VASECTOMY',
    'WallStreetBot',
    'bawse1',
    'zjz',
    'VisualMod',
    'premier_',
    'notmikjaash',
    'WaterCups69',
    'XvGTM17',
    'AutoModerator'
}

In [18]:
pd.set_option('max_colwidth', 100)

# June report

## Posts

In [33]:
posts = await beneath.query_warehouse("""
with
    posts as (
        select 
            created_on, 
            id, 
            author, 
            title, 
            text, 
            flair, 
            permalink,
            length(text) as post_length,
            array_length(regexp_extract_all(title, r"\\x{1F680}")) + array_length(regexp_extract_all(text, r"\\x{1F680}")) as num_rockets,
            array_length(regexp_extract_all(title, r"\\x{1F48E}")) + array_length(regexp_extract_all(text, r"\\x{1F48E}")) as num_diamonds,
        from `examples/reddit/r-wallstreetbets-posts`
        where timestamp_trunc(created_on, month) = "2021-06-01"
    ),
    comments as (
        select 
            created_on, 
            id, 
            post_id, 
            author, 
            text,
            length(text) as comment_length,
            array_length(regexp_extract_all(c.text, r"\\x{1F680}")) as num_rockets,
            array_length(regexp_extract_all(c.text, r"\\x{1F48E}")) as num_diamonds,
        from `examples/reddit/r-wallstreetbets-comments` c
        where timestamp_trunc(created_on, month) = "2021-06-01"
    )
select 
    p.created_on, 
    p.title, 
    p.text, 
    p.author, 
    p.flair, 
    p.permalink,
    p.post_length,
    count(c.id) as num_comments,
    sum(c.comment_length) as sum_comments_length, 
    sum(c.comment_length)/count(c.id) as avg_comment_length,
    p.num_rockets + sum(c.num_rockets) as num_rockets,
    p.num_diamonds + sum(c.num_diamonds) as num_diamonds,
    count(distinct c.author) as nunique_commenters,
from posts p
join comments c on p.id = c.post_id
group by p.created_on, p.title, p.text, p.author, p.flair, p.permalink, p.post_length, p.num_rockets, p.num_diamonds
""")

Post with most comments

In [34]:
posts.sort_values('num_comments', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
371,2021-06-11 20:00:17+00:00,"Weekend Discussion Thread for the Weekend of June 11, 2021",Your weekend discussion thread. Please keep the shitposting to a maximum!\n\nFollow [@Official_W...,OPINION_IS_UNPOPULAR,Weekend Discussion,/r/wallstreetbets/comments/nxosm3/weekend_discussion_thread_for_the_weekend_of_june/,181,63166,4528066,71.685179,3028,440,4668,2021-07-13 14:27:25.308000+00:00
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
210,2021-06-02 10:00:15+00:00,"Daily Discussion Thread for June 02, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nqi9f6/daily_discussion_thread_for_june_02_2021/,2359,40962,2273866,55.511596,5528,437,8586,2021-07-13 14:27:25.307000+00:00


Longest discussion

In [35]:
posts.sort_values('sum_comments_length', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
371,2021-06-11 20:00:17+00:00,"Weekend Discussion Thread for the Weekend of June 11, 2021",Your weekend discussion thread. Please keep the shitposting to a maximum!\n\nFollow [@Official_W...,OPINION_IS_UNPOPULAR,Weekend Discussion,/r/wallstreetbets/comments/nxosm3/weekend_discussion_thread_for_the_weekend_of_june/,181,63166,4528066,71.685179,3028,440,4668,2021-07-13 14:27:25.308000+00:00
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
2674,2021-06-04 20:00:15+00:00,"Weekend Discussion Thread for the Weekend of June 04, 2021",Your weekend discussion thread. Please keep the shitposting to a maximum!\n\nFollow [@Official_W...,OPINION_IS_UNPOPULAR,Weekend Discussion,/r/wallstreetbets/comments/nse1dg/weekend_discussion_thread_for_the_weekend_of_june/,181,37246,2797369,75.105219,1263,239,4761,2021-07-13 14:27:28.740000+00:00


Post with the highest avg comment length (at least 5 comments)

In [26]:
posts[posts['num_comments'] >= 5].sort_values('avg_comment_length', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,nunique_commenters,@meta.timestamp
24581,2021-06-17 18:25:55+00:00,$DKNG YOLOO. Still holding. Still bullish. ðŸ’ŽðŸ™Œ -> ðŸš€ðŸš€,,arjunav,YOLO,/r/wallstreetbets/comments/o241a9/dkng_yoloo_still_holding_still_bullish/,0,5,6974,1394.8,2,5,2021-07-13 14:10:38.334000+00:00
28707,2021-06-08 21:00:43+00:00,Sorrento Therapeutics Bull Case = EZ Squeeze.,"""Wall Street analysts also predicted that in 2021, the companyâ€™s y-o-y revenues would reach $1.1...",Siphen_,Discussion,/r/wallstreetbets/comments/nvecr9/sorrento_therapeutics_bull_case_ez_squeeze/,5008,13,9433,725.615385,0,8,2021-07-13 14:10:42.956000+00:00
26653,2021-06-21 20:07:22+00:00,Be objective and look data and take your seat,Times like these illustrate why having a cool head in a crisis is so important. The ability to b...,Kimaxw,DD,/r/wallstreetbets/comments/o546n6/be_objective_and_look_data_and_take_your_seat/,3226,8,5414,676.75,3,6,2021-07-13 14:10:40.689000+00:00


Post with the most rockets (including comments)

In [39]:
posts.sort_values('num_rockets', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
210,2021-06-02 10:00:15+00:00,"Daily Discussion Thread for June 02, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nqi9f6/daily_discussion_thread_for_june_02_2021/,2359,40962,2273866,55.511596,5528,437,8586,2021-07-13 14:27:25.307000+00:00
0,2021-06-08 11:07:59+00:00,"Daily Popular Ticker Thread for June 08, 2021 - BB | AMC | CLOV",Apologies for the delay. The global Fastly outage knocked Reddit offline.,OPINION_IS_UNPOPULAR,,/r/wallstreetbets/comments/nv1ig9/daily_popular_ticker_thread_for_june_08_2021_bb/,73,26140,1572924,60.173068,5487,481,6103,2021-07-13 14:27:25.305000+00:00


Post with the most diamonds (including comments)

In [38]:
posts.sort_values('num_diamonds', ascending=False).head(3)

Unnamed: 0,created_on,title,text,author,flair,permalink,post_length,num_comments,sum_comments_length,avg_comment_length,num_rockets,num_diamonds,nunique_commenters,@meta.timestamp
3334,2021-06-11 10:00:19+00:00,"Daily Popular Tickers Thread for June 11, 2021 - AMC | BB | CLOV",\nYour daily hype thread. Please keep the shitposting to a maximum.\n\n^Navigate ^WSB |^We ^reco...,VisualMod,,/r/wallstreetbets/comments/nxcb5h/daily_popular_tickers_thread_for_june_11_2021_amc/,1782,9492,670418,70.629794,3163,1167,2523,2021-07-13 14:27:30.164000+00:00
311,2021-06-03 10:00:13+00:00,"Daily Discussion Thread for June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nr9r9t/daily_discussion_thread_for_june_03_2021/,2359,56245,3356092,59.669162,6961,830,10522,2021-07-13 14:27:25.308000+00:00
950,2021-06-02 20:00:20+00:00,"What Are Your Moves Tomorrow, June 03, 2021",Your daily trading discussion thread. Please keep the shitposting to a minimum. \n\n^Navigate ^W...,OPINION_IS_UNPOPULAR,Daily Discussion,/r/wallstreetbets/comments/nquubf/what_are_your_moves_tomorrow_june_03_2021/,2357,35642,2113514,59.298412,4250,706,8936,2021-07-13 14:27:25.313000+00:00


## Authors

In [54]:
authors = await beneath.query_warehouse("""
with
    posts_enhanced as (
        select *,
            array_length(split(title, " ")) as num_words_title,
            array_length(split(text, " ")) as num_words_body,
            array_length(regexp_extract_all(title, r"\\x{1F680}")) as num_rockets_title,
            array_length(regexp_extract_all(text, r"\\x{1F680}")) as num_rockets_body,
            array_length(regexp_extract_all(title, r"\\x{1F48E}")) as num_diamonds_title,
            array_length(regexp_extract_all(text, r"\\x{1F48E}")) as num_diamonds_body
        from `examples/reddit/r-wallstreetbets-posts`
        where timestamp_trunc(created_on, month) = "2021-06-01"
    ),
    comments_enhanced as (
        select *,
            array_length(split(text, " ")) as num_words,
            array_length(regexp_extract_all(text, r"\\x{1F680}")) as num_rockets,
            array_length(regexp_extract_all(text, r"\\x{1F48E}")) as num_diamonds
        from `examples/reddit/r-wallstreetbets-comments`
        where timestamp_trunc(created_on, month) = "2021-06-01"
    ),
    author_posts_stats as (
        select 
            author, 
            count(*) as num_posts,
            sum(num_words_title) + sum(num_words_body) as num_words,
            sum(num_rockets_title) + sum(num_rockets_body) as num_rockets,
            sum(num_diamonds_title) + sum(num_diamonds_body) as num_diamonds,
        from posts_enhanced
        group by author
    ),
    author_comments_stats as (
        select
            author,
            count(*) as num_comments,
            sum(num_words) as num_words,
            sum(num_rockets) as num_rockets,
            sum(num_diamonds) as num_diamonds
        from comments_enhanced
        group by author
    )
select 
    coalesce(p.author, c.author) as author,
    ifnull(p.num_posts, 0) as num_posts,
    ifnull(c.num_comments, 0) as num_comments,
    ifnull(p.num_words, 0) + ifnull(c.num_words, 0) as num_words,
    ifnull(p.num_rockets, 0) + ifnull(c.num_rockets, 0) as num_rockets,
    ifnull(p.num_diamonds, 0) + ifnull(c.num_diamonds, 0) as num_diamonds,
from author_posts_stats p
full join author_comments_stats c on p.author = c.author
""")

In [72]:
authors_no_mods = authors.loc[[author not in MODERATORS for author in authors['author']]]

Author with the most posts

In [73]:
authors_no_mods.sort_values('num_posts', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
15518,BackgroundProgram389,45,323,2280,0,0,2021-07-14 10:15:56.625000+00:00
7314,DerekZ1985,37,393,6352,0,0,2021-07-14 10:15:53.283000+00:00
3098,Z3r0Confidence,37,276,12328,49,0,2021-07-14 10:15:50.287000+00:00


Author with the most comments

In [74]:
authors_no_mods.sort_values('num_comments', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
1607,MoonArmy1977,0,3458,43605,25,32,2021-07-14 10:15:48.965000+00:00
5051,Dinosaur_Eats_Pizza,1,3034,34533,82,0,2021-07-14 10:15:52.077000+00:00
13272,toydan,1,2811,45763,0,0,2021-07-14 10:15:55.755000+00:00


Author who wrote the most words

In [75]:
authors_no_mods.sort_values('num_words', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
13272,toydan,1,2811,45763,0,0,2021-07-14 10:15:55.755000+00:00
1607,MoonArmy1977,0,3458,43605,25,32,2021-07-14 10:15:48.965000+00:00
5051,Dinosaur_Eats_Pizza,1,3034,34533,82,0,2021-07-14 10:15:52.077000+00:00


Author who posted the most rockets

In [76]:
authors_no_mods.sort_values('num_rockets', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
6907,mateace,0,27,295,2717,0,2021-07-14 10:15:52.885000+00:00
11097,unipaulie,2,377,3032,1760,23,2021-07-14 10:15:54.937000+00:00
4766,LordTender,0,372,2931,1039,0,2021-07-14 10:15:51.527000+00:00


Author who posted the most diamonds

In [77]:
authors_no_mods.sort_values('num_diamonds', ascending=False).head(3)

Unnamed: 0,author,num_posts,num_comments,num_words,num_rockets,num_diamonds,@meta.timestamp
15373,Sc0ttyMinz,1,335,2026,590,710,2021-07-14 10:15:56.624000+00:00
8323,Tazzer57,0,875,8127,926,385,2021-07-14 10:15:53.750000+00:00
107348,FriendlyhoodKomrad,3,2,246,0,330,2021-07-14 10:16:33.776000+00:00


Author that spurred the most discussion

In [90]:
posts.loc[[author not in MODERATORS for author in posts['author']]] \
    .groupby('author')['num_comments'] \
    .sum() \
    .reset_index() \
    .sort_values('num_comments', ascending=False) \
    .rename(columns={'num_comments': 'num_comments_on_posts'}) \
    .head(3)

Unnamed: 0,author,num_comments_on_posts
17874,pittluke,8297
19883,yolocallking,5311
13198,Your_Boy_Roy_,4566


## Stock mentions

In [137]:
mentions = await beneath.query_warehouse("""
with
    stock_mentions_posts as (
        select 
            symbol, 
            timestamp_trunc(timestamp, day) as day, 
            count(*) as num_mentions
        from `examples/wallstreetbets-analytics/r-wallstreetbets-posts-stock-mentions`
        group by symbol, timestamp_trunc(timestamp, day)
    ),
    stock_mentions_comments as (
        select 
            symbol, 
            timestamp_trunc(timestamp, day) as day, 
            count(*) as num_mentions
        from `examples/wallstreetbets-analytics/r-wallstreetbets-comments-stock-mentions`
        group by symbol, timestamp_trunc(timestamp, day)
    )
select 
    coalesce(p.symbol, c.symbol) as symbol,
    coalesce(p.day, c.day) as day,
    ifnull(p.num_mentions, 0) + ifnull(c.num_mentions,0) as num_mentions
from stock_mentions_posts p
full join stock_mentions_comments c on p.symbol = c.symbol and p.day = c.day
order by symbol, day
""")

In [138]:
mentions = mentions[~mentions['symbol'].isin(BLACKLIST)]

Keep track of the top symbols

In [156]:
top_symbols_alltime_df = mentions \
    .groupby('symbol')['num_mentions'] \
    .sum() \
    .reset_index() \
    .sort_values('num_mentions', ascending=False)
top_symbols_alltime = top_symbols_alltime_df['symbol'][:100]
top_symbols_june_df = mentions.loc[mentions['day'].dt.month == 6] \
    .groupby('symbol')['num_mentions'] \
    .sum() \
    .reset_index() \
    .sort_values('num_mentions', ascending=False)
top_symbols_june = top_symbols_june_df['symbol'][:100]

In [157]:
fig = px.bar(top_symbols_june_df[0:10], x="num_mentions", y="symbol", text="num_mentions", color='symbol', orientation='h', title='Most mentioned symbols in June', labels={'num_mentions': 'Mentions', 'symbol': 'Symbol'})
fig.update_layout(showlegend=False)
fig.show()

In [162]:
fig = px.line(mentions.loc[(mentions['symbol'].isin(top_symbols_june[:10])) & (mentions['day'].dt.month == 6)], 
              x="day", y="num_mentions", line_group="symbol", color="symbol",
              title="Mentions by day", labels={'num_mentions': 'mentions'})
fig.show()

Get the fraction of all mentions in the day

In [217]:
total_daily_mentions = mentions.groupby('day')['num_mentions'].sum().reset_index()
tmp = mentions \
    .merge(total_daily_mentions, on="day", suffixes=('_stock', '_total')) \
    .sort_values(['symbol','day'])
tmp['fraction_of_mentions'] = tmp['num_mentions_stock'] / tmp['num_mentions_total']
tmp['fraction_of_mentions_MA'] = tmp.groupby('symbol')['fraction_of_mentions'].transform(lambda x: x.rolling(window=7, min_periods=1).mean())

In [212]:
fig = px.line(tmp[tmp['symbol'].isin(top_symbols_june[0:10])].sort_values('day', ascending=False), 
              x="day", y="fraction_of_mentions", line_group="symbol", color="symbol",
              title="Fraction of mentions (daily)")
fig.show()

In [210]:
fig = px.line(tmp[tmp['symbol'].isin(top_symbols_june[0:10])].sort_values('day', ascending=False), 
              x="day", y="fraction_of_mentions_MA", line_group="symbol", color="symbol",
              title="Fraction of mentions (7 day rolling average)")
fig.show()

In [211]:
fig = px.line(tmp_wk[tmp_wk['symbol'].isin(top_symbols[0:10])].sort_values('week', ascending=False), 
              x="week", y="mention_perc", line_group="symbol", color="symbol",
              title="Fraction of mentions (weekly)")
fig.show()

In [230]:
symbol_peaks = tmp.loc[tmp.groupby(['symbol'])["fraction_of_mentions_MA"].idxmax()][['symbol', 'day']] \
    .rename(columns={'day': 'date_of_peak_popularity'})
tmp2 = tmp.merge(symbol_peaks, on='symbol')
tmp2['days_from_peak'] = (tmp2['day'] - tmp2['date_of_peak_popularity']).dt.days
tmp2.head()

Unnamed: 0,symbol,day,num_mentions_stock,@meta.timestamp,num_mentions_total,fraction_of_mentions,fraction_of_mentions_MA,date_of_peak_popularity,days_from_peak
0,A,2021-03-10 00:00:00+00:00,1,2021-07-14 14:10:31.929000+00:00,36129,2.8e-05,2.8e-05,2021-05-11 00:00:00+00:00,-62
1,A,2021-03-16 00:00:00+00:00,1,2021-07-14 14:10:31.929000+00:00,19681,5.1e-05,3.9e-05,2021-05-11 00:00:00+00:00,-56
2,A,2021-03-19 00:00:00+00:00,1,2021-07-14 14:10:31.929000+00:00,16490,6.1e-05,4.6e-05,2021-05-11 00:00:00+00:00,-53
3,A,2021-03-22 00:00:00+00:00,2,2021-07-14 14:10:31.929000+00:00,14202,0.000141,7e-05,2021-05-11 00:00:00+00:00,-50
4,A,2021-03-23 00:00:00+00:00,1,2021-07-14 14:10:31.929000+00:00,18362,5.4e-05,6.7e-05,2021-05-11 00:00:00+00:00,-49


In [234]:
fig = px.line(tmp2[tmp2['symbol'].isin(top_symbols_alltime[0:10])].sort_values('day', ascending=False), 
              x="days_from_peak", y="num_mentions_stock", line_group="symbol", color="symbol",
              title="Days from peak")
fig.show()