In [None]:
#@title Pip Installs
!pip install praw
!pip install textblob
!python -m textblob.download_corpora
!pip install vaderSentiment

In [None]:
#@title Imports
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from bs4 import BeautifulSoup
import praw
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import yfinance as yf

# Tickers
We are analyzing the stocks listed on the S&P 500. Initialize a list of tickers to iterate across.

In [None]:
# Initialize tickers of S&P 500 stocks
tickers_text = 'MMM,AOS,ABT,ABBV,ACN,AYI,ADBE,AAP,AMD,AES,AMG,AFL,A,APD,AKAM,ALK,ALB,ARE,ALGN,ALLE,LNT,ALL,GOOGL,GOOG,MO,AMZN,AEE,AAL,AEP,AXP,AIG,AMT,AWK,AMP,AME,AMGN,APH,ADI,AON,APA,AIV,AAPL,AMAT,APTV,ADM,AJG,AIZ,T,ADSK,ADP,AZO,AVB,AVY,BAC,BAX,BBT,BDX,BBY,BIIB,BLK,HRB,BA,BWA,BXP,BSX,BHF,BMY,AVGO,CHRW,CA,CDNS,CPB,COF,CAH,KMX,CCL,CAT,CBOE,CNC,CNP,CF,SCHW,CHTR,CVX,CMG,CB,CHD,CI,CINF,CTAS,CSCO,C,CFG,CME,CMS,KO,CTSH,CL,CMCSA,CMA,CAG,COP,ED,STZ,GLW,COST,COTY,CCI,CSX,CMI,CVS,DHI,DHR,DRI,DVA,DE,DAL,XRAY,DVN,DLR,DG,DLTR,D,DOV,DTE,DUK,DXC,EMN,ETN,EBAY,ECL,EIX,EW,EA,EMR,ETR,EOG,EQT,EFX,EQIX,EQR,ESS,EL,ES,EXC,EXPE,EXPD,EXR,XOM,FFIV,FAST,FRT,FDX,FIS,FITB,FE,FLS,FMC,FL,F,FTV,BEN,FCX,GRMN,IT,GD,GE,GIS,GM,GPC,GILD,GPN,GS,GT,GWW,HAL,HBI,HOG,HIG,HAS,HCA,HP,HSIC,HPE,HLT,HOLX,HD,HON,HRL,HST,HPQ,HUM,HBAN,HII,IDXX,INFO,ITW,ILMN,INCY,IR,INTC,ICE,IBM,IP,IPG,IFF,INTU,ISRG,IVZ,IQV,IRM,JBHT,SJM,JNJ,JCI,JPM,K,KEY,KMB,KIM,KMI,KLAC,KSS,KHC,KR,LB,LH,LRCX,LEG,LEN,LLY,LNC,LKQ,LMT,L,LOW,LYB,MTB,MAC,M,MPC,MAR,MMC,MLM,MAS,MA,MAT,MKC,MCD,MCK,MDT,MRK,MET,MTD,MGM,MCHP,MU,MSFT,MAA,MHK,TAP,MDLZ,MNST,MCO,MS,MSI,NDAQ,NOV,NAVI,NTAP,NFLX,NWL,NEM,NWSA,NWS,NEE,NKE,NI,NSC,NTRS,NOC,NCLH,NRG,NUE,NVDA,ORLY,OXY,OMC,OKE,ORCL,PCAR,PKG,PH,PAYX,PYPL,PNR,PEP,PRGO,PFE,PCG,PM,PSX,PNW,PNC,RL,PPG,PPL,PX,PFG,PG,PGR,PLD,PRU,PEG,PSA,PHM,PVH,QRVO,QCOM,PWR,DGX,RRC,RJF,O,REG,REGN,RF,RSG,RMD,RHI,ROK,ROP,ROST,RCL,SPGI,CRM,SBAC,SLB,STX,SEE,SRE,SHW,SIG,SPG,SWKS,SLG,SNA,SO,LUV,SWK,SBUX,STT,SYK,STI,SYF,SNPS,SYY,TROW,TPR,TGT,TEL,FTI,TXN,TXT,BK,CLX,COO,HSY,MOS,TRV,DIS,TMO,TJX,TSCO,TDG,TRIP,FOXA,FOX,TSN,USB,UDR,ULTA,UAA,UA,UNP,UAL,UNH,UPS,URI,UHS,UNM,VFC,VLO,VTR,VRSN,VRSK,VZ,VRTX,V,VNO,VMC,WMT,WBA,WM,WAT,WEC,WFC,WDC,WU,WY,WHR,WMB,WYNN,XEL,XRX,XYL,YUM,ZBH,ZION,ZTS'

# Convert into a list for ease of iterations
tickers = tickers_text.split(',')

# Pulling YFinance Data
Using the yfinance api, we can access information about any stock. Given this data, we pull relevant metrics and figures to make our analysis. Additionally, we extract price data and calculate moving averages. All of this information is stored in stock_data to be transformed into a stock_df data frame.

In [None]:
# yfinance info

# Initialize data list
stock_data = []

# Iterate across all tickers and extract data
for ticker in tickers:

  # Attempt to pull data and provide an error code
  try:
    ticker_data = yf.Ticker(ticker)
    print(f'{ticker} data fetch successful')

    # Pull stock company info
    ticker_info = ticker_data.info
    name = ticker_info.get('shortName')
    current_price = ticker_info.get('currentPrice')
    sector = ticker_info.get('sector')
    market_cap = ticker_info.get('marketCap')
    roe = ticker_info.get('returnOnEquity')
    roa = ticker_info.get('returnOnAssets')

    # Make ROE and ROA percentages
    roe = roe * 100 if roe is not None else None
    roa = roa * 100 if roa is not None else None

    # Moving Average Calculator

    # Pull historical prices
    prices = yf.Ticker(ticker).history(period = "1y")['Close']

    # Calculate moving averages
    short_term_ma = prices.rolling(window=30).mean()
    long_term_ma = prices.rolling(window=90).mean()

    # Find current moving average values
    current_short_ma = short_term_ma.iloc[-1]
    current_long_ma = long_term_ma.iloc[-1]

    # Calculate the MA Momentum
    ma_momentum = ((current_short_ma - current_long_ma)/current_long_ma) * 100

    # Append data to list
    stock_data.append({
      'Ticker': ticker,
      'Name': name,
      'Current Price': current_price,
      'Sector': sector,
      'Market Cap': market_cap,
      'ROE': roe,
      'ROA': roa,
      'Ticker': ticker,
      'Current Short MA': current_short_ma,
      'Current Long MA' :  current_long_ma,
      'MA Momentum' : ma_momentum,
      'Prices': prices,
      'Short Term MA': short_term_ma,
      'Long Term MA': long_term_ma,
    })
  except Exception as e:
    print(f'Error: exception {e}')

In [None]:
# Convert data to DataFrame
stock_df = pd.DataFrame(stock_data)

# Set index to ticker
stock_df = stock_df.set_index('Ticker')
stock_df

# Sentiment Analysis
Using Praw to scrape subreddits and Vader Sentiment to compute sentiment scores, we collect information about how many times a stock is mentioned and what is the average sentiment or opinion about a stock. This data is aggregated into a sentiment_df data frame.

In [None]:
# Initialize Sentiment Intensity Analyzer from Vader Sentiment
analyzer = SentimentIntensityAnalyzer()

# Define function to get the sentiment of text
def get_sentiment(text):
    if not isinstance(text, str) or text.strip() == '':
        return 0
    return analyzer.polarity_scores(text)['compound']

# Define functon to average the sentiment of title and body text
def overall_sentiment(row):
    title_sent = row['title_sentiment']
    body_sent = row['body_sentiment']
    if title_sent != 0 and body_sent != 0:
        return (title_sent + body_sent) / 2
    else:
        return None

# Initialize list of subreddits to scrape and false tickers to exclude from sentiment analysis
subredditlist = ['wallstreetbets', 'stockmarket', 'stocks']
false_tickers = {'DD','IT','ALL','A','DAY','YOU','BE','ON','IN','FOR','GO','UP','SO','AI','AR','D','MGM','MFA','MA','PDT','S','T','AS','FT','IP','K', 'USA', 'M', 'B'}

all_frames = []

# Iterate across subreddits to scrape sentiment
for item in subredditlist:
    subscraper = praw.Reddit(
        client_id='xdpuZuV2hoBM9kQDBhNKNQ',
        client_secret='sSoQR5rsBFHvH6XtMwN7Oy0xSJiZRw',
        user_agent='sub-activity-check by u/Flaky-Pattern4439',
        check_for_async=False
    )
    sub = subscraper.subreddit(item)

    # Create a list of the top posts of the week from the subreddit
    posts = list(sub.top(time_filter='month', limit=500))
    # Empty list to append to
    postinfo = []
    # Iterate across top posts of the week and scrape post data
    for post in posts:
        postinfo.append({
            'title': post.title,
            'body': post.selftext,
            'permalink': f'https://www.reddit.com{post.permalink}',
        })

    # Create a dataframe of post data sorted by descending activity
    postsdf = pd.DataFrame(postinfo)
    # Running Vader sentiment analyzer on title and body text
    postsdf['title_sentiment'] = postsdf['title'].apply(get_sentiment)
    postsdf['body_sentiment']  = postsdf['body'].apply(get_sentiment)
    # Averaging sentiment of title and body
    postsdf['overall_sentiment'] = postsdf.apply(overall_sentiment, axis=1)

    # Dictionary of ticker mentions and sentiment_sum for every ticker that will be added to
    ticker_stats = {t: {'mentions': 0, 'sentiment_sum': 0.0} for t in tickers}

    # Iterates through rows in the posts dataframe
    for _, row in postsdf.iterrows():
        text = f" {row['title']} {row['body']} "
        sent = row['overall_sentiment']

        # Iterating through every ticker in the ticker list, adding 1 to mentions when a post mentions it, adding sentiment score to sentiment sum when available
        for ticker in tickers:
            if f' {ticker} ' in text or f' ${ticker} ' in text:
                ticker_stats[ticker]['mentions'] += 1
                if pd.notna(sent):
                    ticker_stats[ticker]['sentiment_sum'] += float(sent)

    # Iterates over every ticker and its stats in ticker_stats and appends to rows
    rows = []
    for ticker, stats in ticker_stats.items():
      if stats['mentions'] > 0:
            rows.append({'ticker': ticker,'mentions': stats['mentions'],'sentiment_sum': stats['sentiment_sum'],})

    # Converting rows to dataframe
    result_df = pd.DataFrame(rows, columns=['ticker','mentions','sentiment_sum'])

    # Removing false tickers
    clean_df = result_df[~result_df['ticker'].isin(false_tickers)].reset_index(drop=True)
    all_frames.append(clean_df)

# Merging all dataframes in all_frames into one frame
combined = pd.concat(all_frames, ignore_index=True)
# Summing total mentions and total sentiment
totals = (combined.groupby('ticker', as_index=False).agg(mentions=('mentions','sum'),sentiment_sum=('sentiment_sum','sum')))
# Calculating average sentiment by dividing total sentiment by total mentions
totals['avg_sentiment'] = totals['sentiment_sum'] / totals['mentions']
# Creating DataFrame of tickers, mentions, and average sentiment from totals, ordered by descending mentions and average sentiment
sentiment_df = (totals.sort_values(['mentions','avg_sentiment'], ascending=[False, False]).reset_index(drop=True)[['ticker','mentions','avg_sentiment']])

# Setting index of DataFrame to ticker name
sentiment_df.set_index('ticker', inplace=True)
sentiment_df

# Data Cleaning and Consolidation
Next, we join our two data frames, stock_df and sentiment_df, to consolidate all of our relevant information. This requires some cleanup in NaN values and column names. Finally, we compute Z-Score for relevant columns: ROE, ROA, momentum, mentions, and average sentiment.

In [None]:
# Merge stock data and sentiment analyses
stock_df = stock_df.join(sentiment_df)
stock_df

In [None]:
# Rename columns
stock_df.rename(columns = {'avg_sentiment':'Average Sentiment', 'mentions': 'Mentions'}, inplace = True)

# Fill NaN sentiments and return metrics for tickers with 0
stock_df['Mentions'] = stock_df['Mentions'].fillna(0)
stock_df['Average Sentiment'] = stock_df['Average Sentiment'].fillna(0)
stock_df['ROE'] = stock_df['ROE'].fillna(0)
stock_df['ROA'] = stock_df['ROA'].fillna(0)

# Normalize Data for ROE, ROA, MA Momentum, Mentions, and Sentiment
stock_df['ROE (Z-Score)'] = (stock_df['ROE'] - stock_df['ROE'].mean()) / stock_df['ROE'].std()
stock_df['ROA (Z-Score)'] = (stock_df['ROA'] - stock_df['ROA'].mean()) / stock_df['ROA'].std()
stock_df['MA Momentum (Z-Score)'] = (stock_df['MA Momentum'] - stock_df['MA Momentum'].mean()) / stock_df['MA Momentum'].std()
stock_df['Mentions (Z-Score)'] = (stock_df['Mentions'] - stock_df['Mentions'].mean()) / stock_df['Mentions'].std()
stock_df['Average Sentiment (Z-Score)'] = (stock_df['Average Sentiment'] - stock_df['Average Sentiment'].mean()) / stock_df['Average Sentiment'].std()

stock_df.sort_values('Mentions', ascending=False)

# Scoring
With key information about each stock, we weight the relative importance of each factor and combine the z-scores to develop a composite score that shows if a stock should be bought or sold. We show the top 3 and bottom 3 stocks.

In [None]:
# Create a composite score for each stock

# Define relative weightings for the formula
sentiment_weight = 0.25
ma_momentum_weight = 0.5
roe_weight = 0.125
roa_weight = 0.125

# Combine into a composite rating score
stock_df['Composite Score'] = (
    (sentiment_weight * stock_df['Mentions (Z-Score)'] * stock_df['Average Sentiment (Z-Score)']) +
    (ma_momentum_weight * stock_df['MA Momentum (Z-Score)']) +
    (roe_weight * stock_df['ROE (Z-Score)']) +
    (roa_weight * stock_df['ROA (Z-Score)'])
    )

# Rearrange columns
stock_df = stock_df[[
    'Name', 'Composite Score', 'Sector', 'Market Cap',
    'ROE', 'ROA', 'Current Short MA',
    'Current Long MA', 'MA Momentum',
    'Mentions', 'Average Sentiment', 'Prices',
    'Short Term MA', 'Long Term MA', 'ROE (Z-Score)',
    'ROA (Z-Score)', 'MA Momentum (Z-Score)',
    'Mentions (Z-Score)', 'Average Sentiment (Z-Score)']].copy()

# Sort by score
stock_df.sort_values('Composite Score', ascending=False, inplace=True)
stock_df


In [None]:
# Show 3 Strongest Stocks
stock_df.head(3)

In [None]:
# Show 3 Weakest Stocks
stock_df.tail(3)

# Visualization
Here we visualize our results and compare scores across different sectors and companies.

In [None]:
# Graph composite scores based on groupby sector
stock_score_by_sector = stock_df.groupby('Sector').mean(numeric_only=True)

# Set up plot
plt.figure(figsize=(18,6))
sns.barplot(data = stock_score_by_sector, x = 'Sector', y = 'Composite Score')
plt.title('Composite Score by Sector')
plt.ylabel('Composite Score')
plt.xlabel('Sector')
plt.xticks(rotation=45)
plt.ylabel('Average Composite Score')
plt.axhline(0, color='black');

In [None]:
# User-Input Look Up Graph for Moving Averages and Prices

# Preventing code from pausing when plot is rendered
plt.ion()

# Initialize user input
user_input = ''

# While loop to allow user to type in input
while user_input != 'EXIT':
  print('Type EXIT to Stop')

  # Prompt User
  user_input = input('Please Enter a Valid Ticker in the S&P 500: ')

  # End on EXIT, if invalid ticker give error message and clear output, if valid ticker display valid ticker price graph
  if user_input == 'EXIT':
    break
  elif user_input not in tickers:
    print('Invalid Ticker')
    continue
  else:
    # Set up Plots
    fig, ax = plt.subplots(1,2, figsize=(16,4))

    # Plot Price
    sns.lineplot(data = stock_df.loc[user_input]['Prices'], ax=ax[0])
    ax[0].set_xlabel('Date')
    ax[0].set_ylabel('Price')
    ax[0].set_title(f'{user_input} Price Over Time')
    ax[0].tick_params(axis='x', labelrotation=45)

    # Plot Moving Averages
    sns.lineplot(data = stock_df.loc[user_input]['Short Term MA'], ax=ax[1], color='Red', label='Short Term MA')
    sns.lineplot(data = stock_df.loc[user_input]['Long Term MA'], ax=ax[1], color='Green', label='Long Term MA')
    ax[1].set_xlabel('Date')
    ax[1].set_ylabel('Price')
    ax[1].set_title(f'{user_input} Moving Average Over Time')
    ax[1].tick_params(axis='x', labelrotation=45)
    ax[1].legend()

    # Show plot, pause to render, close fig window so loop can restart, whitespace between plot and next input query
    plt.draw()
    plt.pause(0.001)
    plt.close(fig)
    print('\n')