In [7]:
import pandas as pd
import yfinance as yf

In [8]:


# List of symbols
my_symbols = ['GOOGL', 'TSLA', 'META', 'AMZN', 'AAPL', 'MSFT', 'VOD', 'ADBE', 'NVDA', 'CRM',
              'EBAY', 'YNDX', 'TRIP', 'NFLX', 'DBX', 'ETSY', 'PYPL','EA', 'BIDU', 'TMUS',
              'SPLK', 'JPM', 'OKTA', 'MDB', 'GM', 'INTC', 'GT', 'SBUX', 'WIX', 'F']

# Function to download stock data
def download_stock(symbol):
    data = yf.download(symbol, start="2020-01-01", end="2021-01-03")
    return data['Close']

# Download stock data for all symbols
my_stocks = {symbol: download_stock(symbol) for symbol in my_symbols}

# Convert the dictionary to a DataFrame
close_prices = pd.DataFrame(my_stocks)

# Print the first few rows
print(close_prices.head())


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [9]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
from statsmodels.api import OLS
from scipy.stats import pearsonr

In [10]:

# Calculate the log of closing prices
close_prices_log = np.log(close_prices)

# Split into train and test datasets
train = close_prices_log.iloc[:220]
test = close_prices_log.iloc[220:252]

# Initialize lists to store results
left_side = []
right_side = []
correlation = []
beta = []
pvalue = []

# Calculate correlation, beta, and p-value for each pair of stocks
for i, stock_i in enumerate(my_symbols):
    for j, stock_j in enumerate(my_symbols):
        if i > j:
            left_side.append(stock_i)
            right_side.append(stock_j)
            
            # Calculate correlation
            corr, _ = pearsonr(train[stock_i], train[stock_j])
            correlation.append(corr)
            
            # Perform linear regression without intercept
            model = OLS(train[stock_i], train[stock_j]).fit()
            beta.append(model.params[0])
            
            # Calculate residuals (spread)
            spread = model.resid
            
            # Perform Augmented Dickey-Fuller test
            adf_result = adfuller(spread)
            pvalue.append(adf_result[1])

# Create DataFrame with results
df = pd.DataFrame({
    'left_side': left_side,
    'right_side': right_side,
    'correlation': correlation,
    'beta': beta,
    'pvalue': pvalue
})

# Filter pairs based on correlation and p-value
my_pairs = df[(df['correlation'] > 0.90) & (df['pvalue'] < 0.05)]

# Sort pairs by correlation in descending order
my_pairs = my_pairs.sort_values('correlation', ascending=False)

print(my_pairs)

print()

print(df)


    left_side right_side  correlation      beta    pvalue
247      OKTA       PYPL     0.966197  1.028991  0.017033
234      OKTA       AMZN     0.961998  1.061598  0.034633
192      SPLK       META     0.937228  0.948744  0.040413
241      OKTA       EBAY     0.935621  1.356657  0.029313
178      TMUS       ADBE     0.935310  0.766591  0.047563
83       NFLX       MSFT     0.930707  1.159167  0.017555
184      TMUS       NFLX     0.920456  0.756831  0.039871
272       MDB       TMUS     0.902173  1.140162  0.039559

    left_side right_side  correlation      beta    pvalue
0        TSLA      GOOGL     0.761775  0.999821  0.493980
1        META      GOOGL     0.927759  1.269064  0.264926
2        META       TSLA     0.871706  1.255867  0.462747
3        AMZN      GOOGL     0.707090  1.135707  0.479601
4        AMZN       TSLA     0.950032  1.125912  0.711337
..        ...        ...          ...       ...       ...
430         F         GM     0.944520  0.567975  0.222203
431         F