After creating the ability to find the correlation between two stocks we want to explore and find what stocks are mostly correlated in the S&P 500.

Upon doing this we create room for ourselves to implement a trading strategy called "pairs trading" where we get to short the stock going up in price and long the stock that's steady in the set of correlated stocks.

##### Imports

In [65]:
import yfinance
import numpy as np
import pandas as pd
import pandas_datareader.data as web

##### Getting stock data

In [70]:
def get_stocks_data(stock1, stock2):
    stock_data = {ticker: web.get_data_yahoo(ticker) for ticker in [stock1, stock2]}
    df = pd.DataFrame({ticker: data['Adj Close'] for ticker, data in stock_data.items()})
    return df

def get_stock_data(ticker):
    stock_data = web.get_data_yahoo(ticker)

    df = pd.DataFrame({ticker: stock_data['Adj Close']})
    return df

print(get_stocks_data('AAPL', 'MSFT').head())
print(get_stock_data('AAPL').join(get_stock_data('MSFT'), how='outer').head())

                 AAPL       MSFT
Date                            
2017-11-09  41.661293  79.015144
2017-11-10  41.523415  78.808441
2017-11-13  41.357010  78.864807
2017-11-14  40.731789  78.977570
2017-11-15  40.194527  78.363708
                 AAPL       MSFT
Date                            
2017-11-09  41.661297  79.015160
2017-11-10  41.523415  78.808426
2017-11-13  41.357006  78.864815
2017-11-14  40.731781  78.977570
2017-11-15  40.194527  78.363739


In [71]:
def get_a_list_of_sandp_500_tickers():
    table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    df = table[0]
    symbols = df['Symbol'].tolist()
    return symbols

##### Create correlation system

In [72]:
def get_total_correlation(stock1, stock2):
    df = get_stocks_data(stock1, stock2)
    df = df.pct_change()
    df = df.dropna()
    total_corr = df.corr()
    return total_corr.iloc[0,1]

def get_total_correlation_SDAG(stock1data, stock2data):
    df = stock1data.join(stock2data, how='outer')
    df = df.pct_change()
    df = df.dropna()
    total_corr = df.corr()
    return total_corr.iloc[0,1]

print(get_total_correlation('GOOG', 'MSFT'))
print(get_total_correlation_SDAG(get_stock_data('GOOG'), get_stock_data('MSFT')))


0.7991325282352403
0.7991339545506133


Iterate through each stock in S&P and create a pair with it highest correlated stock

In [80]:
list_of_sandp_500_tickers = get_a_list_of_sandp_500_tickers()
list_of_pairs = []

In [81]:
def get_pair_for_stock(ticker1):
    if (not ('.' in ticker1)):
        stock1data = get_stock_data(ticker1)
    else: 
        max_correlatedd = [ticker1, 'SAMPLE', 0.0]
        list_of_pairs.append(max_correlatedd)

    max_correlated = [ticker1, 'SAMPLE', 0]
    print(ticker1)
    print("_________________________")
    for ticker2 in list_of_sandp_500_tickers:
        print(ticker2)
        if ticker1 != ticker2 and (not ('.' in ticker2)):
            stock2data = get_stock_data(ticker2)
            total_corr = get_total_correlation_SDAG(stock1data, stock2data)
            if total_corr > max_correlated[2]:
                max_correlated = [ticker1, ticker2, total_corr]
        else: 
            continue
    list_of_pairs.append(max_correlated)

    return max_correlated

for ticker in list_of_sandp_500_tickers:
    get_pair_for_stock(ticker)
    break
            

MMM
_________________________
MMM
AOS
ABT
ABBV
ABMD
ACN
ATVI
ADM
ADBE
ADP
AAP
AES
AFL
A
APD
AKAM
ALK
ALB
ARE
ALGN
ALLE
LNT
ALL
GOOGL
GOOG
MO
AMZN
AMCR
AMD
AEE
AAL
AEP
AXP
AIG
AMT
AWK
AMP
ABC
AME
AMGN
APH
ADI
ANSS
AON
APA
AAPL
AMAT
APTV
ACGL
ANET
AJG
AIZ
T
ATO
ADSK
AZO
AVB
AVY
BKR
BALL
BAC
BBWI
BAX
BDX
WRB
BRK.B
BBY
BIO
TECH
BIIB
BLK
BK
BA
BKNG
BWA
BXP
BSX
BMY
AVGO
BR
BRO
BF.B
CHRW
CDNS
CZR
CPT
CPB
COF
CAH
KMX
CCL
CARR
CTLT
CAT
CBOE
CBRE
CDW
CE
CNC
CNP
CDAY
CF
CRL
SCHW
CHTR
CVX
CMG
CB
CHD
CI
CINF
CTAS
CSCO
C
CFG
CLX
CME
CMS
KO
CTSH
CL
CMCSA
CMA
CAG
COP
ED
STZ
CEG
COO
CPRT
GLW
CTVA
CSGP
COST
CTRA
CCI
CSX
CMI
CVS
DHI
DHR
DRI
DVA
DE
DAL
XRAY
DVN
DXCM
FANG
DLR
DFS
DISH
DIS
DG
DLTR
D
DPZ
DOV
DOW
DTE
DUK
DD
DXC
EMN
ETN
EBAY
ECL
EIX
EW
EA
ELV
LLY
EMR
ENPH
ETR
EOG
EPAM
EQT
EFX
EQIX
EQR
ESS
EL
ETSY
RE
EVRG
ES
