In [1]:
# Basic Python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, date
from tqdm import tqdm
import requests

# Quant Libraries

import yfinance as yf # yahoo finance PyPi for getting stock data
import quandl # provides additional stock data but it might be deprecated
import nsepy # this gets derivatives data
import pyfolio as pf # provides performance and risk analysis of financial portfolios
import mplfinance as mpf # works in combination with matplotlib and pandas to plot finance data
import pandas_ta as ta # pandas but it also has TA indicators
import numpy_financial as npf # numpy with additional financial functions
import pandas_datareader as pdr # helps you get specific data very quickly
import statsmodels # provides a lot of different statistics models like linear regression, time series analysis, and bayesian analysis
import pynance as pn # grabs info from stock market derivatives
# import zipline # useful for backtesting and live trading algorithmic trading systems, it isn't working with all of the other dependencies yet
from yahoo_fin import stock_info as si # this is a useful tool for getting stock information

# ML Models

import sklearn # big SciPy machine learning package
import torch # big neural network Python package developed by Facebook
import tensorflow as tf # big neural network Python package developed by Google
plt.style.use('seaborn-v0_8') # this changes the matplotlib style to something nicer that we can use for financial purposes
from sklearn.model_selection import GridSearchCV # this is a useful tool for finding the best parameters for a model

# Stats

from scipy.stats import t



In [2]:
def calculate_rsi(data, window=14):
    # Calculate relative strength index (RSI)
    temp = data['Adj Close']
    print(temp)
    delta = data['Adj Close'].diff(1)
    print(delta)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=window).mean()
    avg_loss = loss.rolling(window=window).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def rsi_2(df):
    delta = df['Close'].diff(1)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14, min_periods=14).mean()
    avg_loss = loss.rolling(window=14, min_periods=14).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def get_pe_ratio(ticker):
    pe_ratio = si.get_quote_table(ticker)["PE Ratio (TTM)"]
    return pe_ratio

In [3]:
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()

for i in range(len(sp500_tickers)):
   sp500_tickers[i] = sp500_tickers[i].replace(".", "-")

sp500_tickers.remove("KVUE")
sp500_tickers.remove("NDAQ")

rsi_values = []
pe_ratios = []

print(sp500_tickers)

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BK', 'BBWI', 'BAX', 'BDX', 'BRK-B', 'BBY', 'BIO', 'TECH', 'BIIB', 'BLK', 'BX', 'BA', 'BKNG', 'BWA', 'BXP', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF-B', 'BLDR', 'BG', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'COR', 'CNC', 'CNP', 'CDAY', 'CF', 'CHRW', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CMA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CCI', 'CSX', 'CMI', 'C

In [10]:
# start_url = 'https://www.alphavantage.co/query?function=EARNINGS&symbol='
# end_url = '&apikey=79X1YY4SIL4SUCQ8'

pe = {}
rsi = {}
for tick in sp500_tickers:

    # print(tick)

    found_rsi = True
    found_pe = True

    try:
        temp_pe = yf.Ticker(tick).info['trailingPE']
        prices = yf.download(tick, start="2023-12-11", end="2024-01-03", progress=False)
        rsi[tick] = calculate_rsi(prices).iloc[-1]
    except:
        print("No data found for for: " + tick)
        found_rsi = False
        found_pe = False

    rsi[tick] = calculate_rsi(prices).iloc[-1] if found_rsi else None
    pe[tick] = temp_pe if found_rsi else None


No data found for for: MMM
Date
2023-12-11    78.040001
2023-12-12    79.480003
2023-12-13    79.529999
2023-12-14    80.589996
2023-12-15    79.199997
2023-12-18    79.389999
2023-12-19    80.900002
2023-12-20    80.230003
2023-12-21    81.559998
2023-12-22    81.730003
2023-12-26    81.849998
2023-12-27    82.160004
2023-12-28    82.239998
2023-12-29    82.440002
2024-01-02    81.440002
Name: Adj Close, dtype: float64
Date
2023-12-11         NaN
2023-12-12    1.440002
2023-12-13    0.049995
2023-12-14    1.059998
2023-12-15   -1.389999
2023-12-18    0.190002
2023-12-19    1.510002
2023-12-20   -0.669998
2023-12-21    1.329994
2023-12-22    0.170006
2023-12-26    0.119995
2023-12-27    0.310005
2023-12-28    0.079994
2023-12-29    0.200005
2024-01-02   -1.000000
Name: Adj Close, dtype: float64
Date
2023-12-11    78.040001
2023-12-12    79.480003
2023-12-13    79.529999
2023-12-14    80.589996
2023-12-15    79.199997
2023-12-18    79.389999
2023-12-19    80.900002
2023-12-20    80.2300

In [11]:
print(pe)
print(rsi)

{'MMM': None, 'AOS': 40.258705, 'ABT': 34.3589, 'ABBV': 45.041092, 'ACN': 34.358334, 'ADBE': 51.808437, 'AMD': 1611.3636, 'AES': None, 'AFL': 11.6863, 'A': 31.262531, 'APD': 25.451895, 'ABNB': 18.224115, 'AKAM': 37.17825, 'ALB': 4.248139, 'ARE': 90.108696, 'ALGN': 55.87315, 'ALLE': 19.757912, 'LNT': 17.893772, 'ALL': None, 'GOOGL': 29.154215, 'GOOG': 29.518232, 'MO': 8.204082, 'AMZN': 83.3089, 'AMCR': 14.454545, 'AEE': 15.761906, 'AAL': 12.504132, 'AEP': 17.85321, 'AXP': 17.952763, 'AIG': 12.071179, 'AMT': 131.52666, 'AWK': 25.446281, 'AMP': 18.763182, 'AME': 29.56962, 'AMGN': 22.174252, 'APH': 32.382637, 'ADI': 29.5625, 'ANSS': 60.297638, 'AON': 22.94073, 'APA': 6.4091816, 'AAPL': 31.338762, 'AMAT': 20.57953, 'APTV': 10.71482, 'ACGL': 10.309678, 'ADM': 7.239221, 'ANET': 44.066666, 'AJG': 52.60271, 'AIZ': 17.398571, 'T': 8.77665, 'ATO': 18.66995, 'ADSK': 59.545883, 'ADP': 28.007116, 'AZO': 20.243484, 'AVB': 26.93475, 'AVY': 33.781513, 'AXON': 128.24489, 'BKR': 15.958116, 'BALL': 29.812

In [20]:
pe_lst = list(pe.values())
rsi_lst = list(rsi.values())

empty = []

np_pe = np.array(empty)
np_rsi = np.array(empty)
print(np_pe)

for i in tqdm(range(len(pe))):
    if pe_lst[i] is not None:
        np_pe = np.append(np_pe, pe_lst[i])
        np_rsi = np.append(np_rsi, rsi_lst[i])


[]


100%|██████████| 501/501 [00:00<00:00, 93910.72it/s]


In [21]:
print(np_pe)

[4.02587050e+01 3.43589000e+01 4.50410920e+01 3.43583340e+01
 5.18084370e+01 1.61136360e+03 1.16863000e+01 3.12625310e+01
 2.54518950e+01 1.82241150e+01 3.71782500e+01 4.24813900e+00
 9.01086960e+01 5.58731500e+01 1.97579120e+01 1.78937720e+01
 2.91542150e+01 2.95182320e+01 8.20408200e+00 8.33089000e+01
 1.44545450e+01 1.57619060e+01 1.25041320e+01 1.78532100e+01
 1.79527630e+01 1.20711790e+01 1.31526660e+02 2.54462810e+01
 1.87631820e+01 2.95696200e+01 2.21742520e+01 3.23826370e+01
 2.95625000e+01 6.02976380e+01 2.29407300e+01 6.40918160e+00
 3.13387620e+01 2.05795300e+01 1.07148200e+01 1.03096780e+01
 7.23922100e+00 4.40666660e+01 5.26027100e+01 1.73985710e+01
 8.77665000e+00 1.86699500e+01 5.95458830e+01 2.80071160e+01
 2.02434840e+01 2.69347500e+01 3.37815130e+01 1.28244890e+02
 1.59581160e+01 2.98125020e+01 1.08538960e+01 1.38325000e+01
 1.35159230e+01 8.43260900e+01 4.63111530e+01 1.09769300e+01
 1.29222790e+01 4.77828940e+01 2.43359830e+01 2.15521490e+01
 6.77010900e+01 2.488053

In [22]:
correlation_coefficient = np.corrcoef(np_pe, np_rsi)[0, 1]

print(f"Correlation Coefficient: {correlation_coefficient}")

Correlation Coefficient: 0.02510880763044771
