# Retrieving and Emailing the Top 20 "WallStreetBet" Mentions

In [1]:
import os
import re
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
import praw # reddit API
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

## Initiate the Reddit API object:

In [2]:
client_id = os.getenv('client_id')
client_secret = os.getenv('client_secret')
reddit = praw.Reddit(
    client_id = client_id,
    client_secret = client_secret,
    user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
)

## Get content from Reddit/wallstreetbets:

In [3]:
# Initialize empty list to hold content:
wsb_content = []

# Loop through 500 most recent posts and append to wsb_content list:
for post in reddit.subreddit('wallstreetbets').hot(limit=500):
    content = {
        "title" : post.title,
        "text" : post.selftext
    }
    wsb_content.append(content)
    
# Convert 'wsb_content' list to a DataFrame:
content_df = pd.DataFrame(wsb_content)

In [4]:
# Match any character not inside the defined '[]':
regex = re.compile('[^a-zA-Z ]')

# Create empty dictionary to store words:
words_dict = {}

# Loop through posts to analyze word frequency:
for (index, row) in content_df.iterrows():
    # Get title:
    title = row['title']
    title = regex.sub('', title)
    title_words = title.split(' ')
    
    # Get content:
    content = row['text']
    content = regex.sub('',content)
    content_words = content.split(' ')
    
    # Combine title and content:
    words = title_words + content_words
    
    # Loop through each word to count frequency (excluding words defined in the exclusion list):
    for x in words:
        if x in ['A', 'B', 'GO', 'ARE', 'ON', 'IT', 'ALL', 'NEXT', 'PUMP', 'AT', 'NOW', 'FOR', 'TD', 'CEO', 'AM', 'K', 'BIG', 'BY', 'LOVE', 'CAN', 'BE', 'SO', 'OUT', 'STAY', 'OR', 'NEW','RH','EDIT','ONE','ANY']:
            pass
        elif x in word_dict:
            words_dict[x] += 1
        else:
            words_dict[x] = 1
            
# Convert 'word_dict' to DataFrame:
words_df = pd.DataFrame.from_dict(list(word_dict.items())).rename(columns={0:"Ticker",1:"Frequency"})

In [5]:
# Read tickers.csv into a DataFrame:
tickers_df = pd.read_csv('stock_tickers.csv')#.rename(columns={"Symbol":"Term","Name":"Company_Name"})

In [6]:
# Merge tickers_df and Reddit scraped words:
top_mentions_df = pd.merge(ticker_df,words_df,on='Ticker')

In [7]:
# Sort by Frequency descending and reset index:
top_mentions_df = top_mentions_df.sort_values(by='Frequency',ascending=False)
top_mentions_df = top_mentions_df.reset_index(drop=True)
# Display top 20:
top_mentions_df.head(20)

Unnamed: 0,Term,Company_Name,Frequency
0,GME,GameStop Corporation Common Stock,141
1,C,Citigroup Inc. Common Stock,65
2,BCX,BlackRock Resources Common Shares of Beneficia...,52
3,DD,DuPont de Nemours Inc. Common Stock,42
4,TSM,Taiwan Semiconductor Manufacturing Company Ltd.,41
5,AMC,AMC Entertainment Holdings Inc. Class A Common...,34
6,RKT,Rocket Companies Inc. Class A Common Stock,28
7,UWMC,UWM Holdings Corporation Class A Common Stock,24
8,D,Dominion Energy Inc. Common Stock,19
9,EOD,Wells Fargo Global Dividend Opportunity Fund,14


In [8]:
# Select and organize top 20 most discussed stocks into readable mail content:
mail_content = (('Top 20 Mentions on WallStreetBets\n')+(f'1){top_mentions_df.iloc[[0],[0]]},{top_mentions_df.iloc[[0],[1]]} - {top_mentions_df.iloc[[0],[2]]} mentions\n')+(f'2){top_mentions_df.iloc[[1],[0]]},{top_mentions_df.iloc[[1],[1]]} - {top_mentions_df.iloc[[1],[2]]} mentions\n')+(f'3){top_mentions_df.iloc[[2],[0]]},{top_mentions_df.iloc[[2],[1]]} - {top_mentions_df.iloc[[2],[2]]} mentions\n')+(f'4){top_mentions_df.iloc[[3],[0]]},{top_mentions_df.iloc[[3],[1]]} - {top_mentions_df.iloc[[3],[2]]} mentions\n')+(f'5){top_mentions_df.iloc[[4],[0]]},{top_mentions_df.iloc[[4],[1]]} - {top_mentions_df.iloc[[4],[2]]} mentions\n')+(f'6){top_mentions_df.iloc[[5],[0]]},{top_mentions_df.iloc[[5],[1]]} - {top_mentions_df.iloc[[5],[2]]} mentions\n')+(f'7){top_mentions_df.iloc[[6],[0]]},{top_mentions_df.iloc[[6],[1]]} - {top_mentions_df.iloc[[6],[2]]} mentions\n')+(f'8){top_mentions_df.iloc[[7],[0]]},{top_mentions_df.iloc[[7],[1]]} - {top_mentions_df.iloc[[7],[2]]} mentions\n')+(f'9){top_mentions_df.iloc[[8],[0]]},{top_mentions_df.iloc[[8],[1]]} - {top_mentions_df.iloc[[8],[2]]} mentions\n')+(f'10){top_mentions_df.iloc[[9],[0]]},{top_mentions_df.iloc[[9],[1]]} - {top_mentions_df.iloc[[9],[2]]} mentions'))

In [None]:
sender_address = os.getenv('SENDER_ADDRESS')
sender_pass = os.getenv('SENDER_PASSWORD')
receiver_address = os.getenv('RECEIVER_ADDRESS')

# Setup MIME:
message = MIMEMultipart()
message['From'] = sender_address
message['To'] = receiver_address
message['Subject'] = 'Top 20 Reddit Stock Mentions'

# Create the email body and attachments:
message.attach(MIMEText(mail_content,'plain'))

# Create SMTP session using gmail and port 587:
session = smtplib.SMTP('smtp.gmail.com',587)

# Initiate TLS security:
session.starttls()

# Login the sender address:
session.login(sender_address,sender_pass)

# Convert email message to string:
text = message.as_string()

# Send the email:
session.sendmail(sender_address,receiver_address,text)

# End TLS session:
session.quit()