In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from sklearn import linear_model

## Cointegration test 

In [None]:
def coInt(df, stock1, stock2):
    # run regression of AAPL on GOOG
    res1 = sm.OLS(df[stock1], sm.add_constant(df[stock2])).fit()
    resid = res1.resid
    _, pvalue, _ = coint(df_tech['AAPL'], df_tech['GOOG'])
    return pvalue

Tech industry data is downloaded from yfinance and then stored in a dataframe. Similar treatment is replicated for health and energy industries in later sections.

In [None]:
# Tech Industries

df_aapl = yf.download("AAPL", start="2023-01-01", end="2023-04-30")
df_msft = yf.download("MSFT", start="2023-01-01", end="2023-04-30")
df_meta = yf.download("META", start="2023-01-01", end="2023-04-30")
df_goog = yf.download("GOOG", start="2023-01-01", end="2023-04-30")
df_tech = pd.concat([df_aapl["Close"], df_msft["Close"], df_meta["Close"], df_goog["Close"]], axis=1)
df_tech = df_tech.diff()
df_tech.replace([np.inf, -np.inf], np.nan, inplace=True)
df_tech = df_tech.dropna()
df_tech.columns = ['AAPL', 'MSFT', 'META', 'GOOG']

df_tech

In [None]:
# Correlations of All Possible Pairings

df_tech['r_AAPL_MSFT'] = df_tech['AAPL'].corr(df_tech['MSFT'])
df_tech['r_AAPL_META'] = df_tech['AAPL'].corr(df_tech['META'])
df_tech['r_AAPL_GOOG'] = df_tech['AAPL'].corr(df_tech['GOOG'])
df_tech['r_MSFT_META'] = df_tech['MSFT'].corr(df_tech['META'])
df_tech['r_MSFT_GOOG'] = df_tech['MSFT'].corr(df_tech['GOOG'])
df_tech['r_META_GOOG'] = df_tech['META'].corr(df_tech['GOOG'])

# Price Ratios of All Possible Pairings

df_tech['ratio_AAPL_MSFT'] = df_tech['AAPL']/(df_tech['MSFT'])
df_tech['ratio_AAPL_META'] = df_tech['AAPL']/(df_tech['META'])
df_tech['ratio_AAPL_GOOG'] = df_tech['AAPL']/(df_tech['GOOG'])
df_tech['ratio_MSFT_META'] = df_tech['MSFT']/(df_tech['META'])
df_tech['ratio_MSFT_GOOG'] = df_tech['MSFT']/(df_tech['GOOG'])
df_tech['ratio_META_GOOG'] = df_tech['META']/(df_tech['GOOG'])

# Betas of all Possible Pairings

df_tech['b_AAPL_MSFT'] = np.cov(df_tech['AAPL'], df_tech['MSFT'])[0][1]/np.var(df_tech['MSFT'])
df_tech['b_AAPL_META'] = np.cov(df_tech['AAPL'], df_tech['META'])[0][1]/np.var(df_tech['META'])
df_tech['b_AAPL_GOOG'] = np.cov(df_tech['AAPL'], df_tech['GOOG'])[0][1]/np.var(df_tech['GOOG'])
df_tech['b_MSFT_META'] = np.cov(df_tech['MSFT'], df_tech['META'])[0][1]/np.var(df_tech['META'])
df_tech['b_MSFT_GOOG'] = np.cov(df_tech['MSFT'], df_tech['GOOG'])[0][1]/np.var(df_tech['GOOG'])
df_tech['b_META_GOOG'] = np.cov(df_tech['META'], df_tech['GOOG'])[0][1]/np.var(df_tech['GOOG'])

# CoIngration Test of all Possible Pairings

df_tech['coInt_AAPL_MSFT'] = coInt(df_tech, 'AAPL', 'MSFT')
df_tech['coInt_AAPL_META'] = coInt(df_tech, 'AAPL', 'META')
df_tech['coInt_AAPL_GOOG'] = coInt(df_tech, 'AAPL', 'GOOG')
df_tech['coInt_MSFT_META'] = coInt(df_tech, 'MSFT', 'META')
df_tech['coInt_MSFT_GOOG'] = coInt(df_tech, 'MSFT', 'GOOG')
df_tech['coInt_META_GOOG'] = coInt(df_tech, 'META', 'GOOG')

df_tech

## OLS Regression - Tech Industry 

In [None]:
x_tech = df_tech[['AAPL', 'b_AAPL_MSFT', 'r_AAPL_MSFT']]
y_tech = (df_tech['MSFT'])

regr_tech = linear_model.LinearRegression()
regr_tech.fit(x_tech, y_tech)

print('Intercept: \n', regr_tech.intercept_)
print('Coefficients: \n', regr_tech.coef_)

# with statsmodels
x_tech = sm.add_constant(x_tech) # adding a constant
 
model_tech = sm.OLS(y_tech, x_tech).fit()
predictions_tech = model_tech.predict(x_tech) 
 
print_model_tech = model_tech.summary()
print(print_model_tech)

## Trading Strategy - Tech Industry


In [None]:
###Trading Strategy

def price_calc(df, stock1, stock2, regr):
    b_str = 'b_' + stock1 + '_' + stock2
    r_str = 'r_' + stock1 + '_' + stock2
    coef = regr.coef_
    inte = regr.intercept_
    price = regr.intercept_ + df[stock1] * (coef[0] ** 1) + df[b_str] * (coef[1] ** 2) + df[r_str] * (coef[2] ** 3)
    return price - df[stock2]

price_calc(df_tech, 'AAPL', 'META', regr_tech)

## Healthcare Industry

In [None]:
# Healthcare Industries
df_mrk = yf.download("MRK", start="2023-01-01", end="2023-04-30")
df_jnj = yf.download("JNJ", start="2023-01-01", end="2023-04-30")
df_pfe = yf.download("PFE", start="2023-01-01", end="2023-04-30")
df_abt = yf.download("ABT", start="2023-01-01", end="2023-04-30")
df_health = pd.concat([df_mrk["Close"], df_jnj["Close"], df_pfe["Close"], df_abt["Close"]], axis=1)
df_health = df_health.diff()
df_health.replace([np.inf, -np.inf], np.nan, inplace=True)
df_health = df_health.dropna()
df_health.columns = ['MRK', 'JNJ', 'PFE', 'ABT']

In [None]:
# Correlations for All Possible Pairings

df_health['r_MRK_JNJ'] = df_health['MRK'].corr(df_health['JNJ'])
df_health['r_MRK_PFE'] = df_health['MRK'].corr(df_health['PFE'])
df_health['r_MRK_ABT'] = df_health['MRK'].corr(df_health['ABT'])
df_health['r_JNJ_PFE'] = df_health['JNJ'].corr(df_health['PFE'])
df_health['r_JNJ_ABT'] = df_health['JNJ'].corr(df_health['ABT'])
df_health['r_PFE_ABT'] = df_health['PFE'].corr(df_health['ABT'])

# Price Ratios for All Possible Pairings

df_health['ratio_MRK_JNJ'] = df_health['MRK']/(df_health['JNJ'])
df_health['ratio_MRK_JNJ'] = df_health['MRK']/(df_health['JNJ'])
df_health['ratio_MRK_ABT'] = df_health['MRK']/(df_health['ABT'])
df_health['ratio_JNJ_PFE'] = df_health['JNJ']/(df_health['PFE'])
df_health['ratio_JNJ_ABT'] = df_health['JNJ']/(df_health['ABT'])
df_health['ratio_PFE_ABT'] = df_health['PFE']/(df_health['ABT'])

# Betas for All Possible Pairings

df_health['b_MRK_JNJ'] = np.cov(df_health['MRK'], df_health['JNJ'])[0][1]/np.var(df_health['JNJ'])
df_health['b_MRK_PFE'] = np.cov(df_health['MRK'], df_health['PFE'])[0][1]/np.var(df_health['PFE'])
df_health['b_MRK_ABT'] = np.cov(df_health['MRK'], df_health['ABT'])[0][1]/np.var(df_health['ABT'])
df_health['b_JNJ_PFE'] = np.cov(df_health['JNJ'], df_health['PFE'])[0][1]/np.var(df_health['PFE'])
df_health['b_JNJ_ABT'] = np.cov(df_health['JNJ'], df_health['ABT'])[0][1]/np.var(df_health['ABT'])
df_health['b_PFE_ABT'] = np.cov(df_health['PFE'], df_health['ABT'])[0][1]/np.var(df_health['ABT'])

# CoIngration Test of all Possible Pairings

df_health['coInt_MRK_JNJ'] = coInt(df_health, 'MRK', 'JNJ')
df_health['coInt_MRK_PFE'] = coInt(df_health, 'MRK', 'PFE')
df_health['coInt_MRK_ABT'] = coInt(df_health, 'MRK', 'ABT')
df_health['coInt_JNJ_PFE'] = coInt(df_health, 'JNJ', 'PFE')
df_health['coInt_JNJ_ABT'] = coInt(df_health, 'JNJ', 'ABT')
df_health['coInt_PFE_ABT'] = coInt(df_health, 'PFE', 'ABT')

df_health

## OLS Regression - Healthcare Industry

In [None]:
x_health = df_health[['MRK', 'b_MRK_JNJ', 'r_MRK_JNJ']]
y_health = (df_health['JNJ'])

regr_health = linear_model.LinearRegression()
regr_health.fit(x_health, y_health)

print('Intercept: \n', regr_health.intercept_)
print('Coefficients: \n', regr_health.coef_)

# with statsmodels
x_health = sm.add_constant(x_health) # adding a constant
 
model_health = sm.OLS(y_health, x_health).fit()
predictions_health = model_health.predict(x_health) 
 
print_model_health = model_health.summary()
print(print_model_health)

## Trading Strategy - Healthcare Industry

In [None]:
price_calc(df_health, 'MRK', 'PFE', regr_health)

## Energy Industry

In [None]:
# Energy Industries

df_nee = yf.download("NEE", start="2023-01-01", end="2023-04-30")
df_cvx = yf.download("CVX", start="2023-01-01", end="2023-04-30")
df_xom = yf.download("XOM", start="2023-01-01", end="2023-04-30")
df_shel = yf.download("SHEL", start="2023-01-01", end="2023-04-30")
df_energy = pd.concat([df_nee["Close"], df_cvx["Close"], df_xom["Close"], df_shel["Close"]], axis=1)
df_energy = df_energy.diff()
df_energy.replace([np.inf, -np.inf], np.nan, inplace=True)
df_energy = df_energy.dropna()
df_energy.columns = ['NEE', 'CVX', 'XOM', 'SHEL']

In [None]:
# Correlation of All Possible Pairings

df_energy['r_NEE_CVX'] = df_energy['NEE'].corr(df_energy['CVX'])
df_energy['r_NEE_XOM'] = df_energy['NEE'].corr(df_energy['XOM'])
df_energy['r_NEE_SHEL'] = df_energy['NEE'].corr(df_energy['SHEL'])
df_energy['r_CVX_XOM'] = df_energy['CVX'].corr(df_energy['XOM'])
df_energy['r_CVX_SHEL'] = df_energy['CVX'].corr(df_energy['SHEL'])
df_energy['r_XOM_SHEL'] = df_energy['XOM'].corr(df_energy['SHEL'])

# Price Ratio of All Possible Pairings

df_energy['ratio_NEE_CVX'] = df_energy['NEE']/(df_energy['CVX'])
df_energy['ratio_NEE_XOM'] = df_energy['NEE']/(df_energy['XOM'])
df_energy['ratio_NEE_SHEL'] = df_energy['NEE']/(df_energy['SHEL'])
df_energy['ratio_CVX_XOM'] = df_energy['CVX']/(df_energy['XOM'])
df_energy['ratio_CVX_SHEL'] = df_energy['CVX']/(df_energy['SHEL'])
df_energy['ratio_XOM_SHEL'] = df_energy['XOM']/(df_energy['SHEL'])

# Betas of All Possible Pairings

df_energy['b_NEE_CVX'] = np.cov(df_energy['NEE'], df_energy['CVX'])[0][1]/np.var(df_energy['CVX'])
df_energy['b_NEE_XOM'] = np.cov(df_energy['NEE'], df_energy['XOM'])[0][1]/np.var(df_energy['XOM'])
df_energy['b_NEE_SHEL'] = np.cov(df_energy['NEE'], df_energy['SHEL'])[0][1]/np.var(df_energy['SHEL'])
df_energy['b_CVX_XOM'] = np.cov(df_energy['CVX'], df_energy['XOM'])[0][1]/np.var(df_energy['XOM'])
df_energy['b_CVX_SHEL'] = np.cov(df_energy['CVX'], df_energy['SHEL'])[0][1]/np.var(df_energy['SHEL'])
df_energy['b_XOM_SHEL'] = np.cov(df_energy['XOM'], df_energy['SHEL'])[0][1]/np.var(df_energy['SHEL'])

# CoIngrations of All Possible Pairings

df_energy['coInt_NEE_CVX'] = coInt(df_energy, 'NEE', 'CVX')
df_energy['coInt_NEE_XOM'] = coInt(df_energy, 'NEE', 'XOM')
df_energy['coInt_NEE_SHEL'] = coInt(df_energy, 'NEE', 'SHEL')
df_energy['coInt_CVX_XOM'] = coInt(df_energy, 'CVX', 'XOM')
df_energy['coInt_CVX_SHEL'] = coInt(df_energy, 'CVX', 'SHEL')
df_energy['coInt_XOM_SHEL'] = coInt(df_energy, 'XOM', 'SHEL')

df_energy

## OLS - Energy Industry

In [None]:
x_energy = df_energy[['CVX', 'b_CVX_XOM', 'r_CVX_XOM']]
y_energy = (df_energy['XOM'])

regr_energy = linear_model.LinearRegression()
regr_energy.fit(x_energy, y_energy)

print('Intercept: \n', regr_energy.intercept_)
print('Coefficients: \n', regr_energy.coef_)

# with statsmodels
x_energy = sm.add_constant(x_energy) # adding a constant
 
model_energy = sm.OLS(y_energy, x_energy).fit()
predictions_energy = model_energy.predict(x_energy) 
 
print_model_energy = model_energy.summary()
print(print_model_energy)

## Trading Strategy - Energy Industry

In [None]:
price_calc(df_energy, 'NEE', 'SHEL', regr_energy)