In [11]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts

In [12]:
# Read cluster data from CSV files
cluster_1 = pd.read_csv('Cluster_1.csv').columns
cluster_2 = pd.read_csv('Cluster_2.csv').columns
cluster_3 = pd.read_csv('Cluster_3.csv').columns
cluster_4 = pd.read_csv('Cluster_4.csv').columns

# Read historical adjusted close prices for the financial sector
price_data = pd.read_csv('hist adjust close financials.csv', index_col='Date', parse_dates=True)

In [13]:
cluster_2

Index(['ACN', 'CDAY', 'CTSH', 'DXC', 'IT', 'HPE'], dtype='object')

In [29]:
clusters = [cluster_1, cluster_2, cluster_3, cluster_4]
selected_pairs1 = []
selected_pairs2 = []
selected_pairs3 = []
selected_pairs4 = []

In [15]:
def is_cointegrated(df, stock1, stock2, significance_level=0.1):
    y = df[stock1]
    x = df[stock2]
    x = sm.add_constant(x)
    model = sm.OLS(y, x).fit()
    residuals = model.resid
    adf_result = ts.adfuller(residuals)
    p_value = adf_result[1]
    return p_value < significance_level

def calculate_correlation(df, stock1, stock2):
    return df[[stock1, stock2]].pct_change().corr().iloc[0, 1]

In [43]:
#for cluster_num, cluster in enumerate(clusters, start=1):
cluster=clusters[0]
#print(f"Analyzing Cluster 1: {cluster}")
n = len(cluster)
for i in range(n):
    for j in range(i + 1, n):
        stock1 = cluster[i]
        stock2 = cluster[j]
        #print(f"Checking stock pair {stock1}, {stock2}")
        correlation = calculate_correlation(price_data, stock1, stock2)
        cointegrated = is_cointegrated(price_data, stock1, stock2)
        if cointegrated:
            #print(f"Selected pair: {stock1}, {stock2}, correlation: {correlation} AND THEYRE {cointegrated}")
            selected_pairs1.append((stock1, stock2, correlation))
print("\n")

print("Selected pairs for mean reversion trading:", selected_pairs1)




Selected pairs for mean reversion trading: [('ADBE', 'AKAM', 0.43403370985960793), ('APH', 'ADI', 0.6690229507866844), ('APH', 'CDNS', 0.6199248434544392), ('APH', 'CDW', 0.6693047569678652), ('APH', 'ENPH', 0.31489207383678003), ('ADI', 'CDNS', 0.6263922969109393), ('ADI', 'CDW', 0.5707645838936513), ('ADI', 'ENPH', 0.2905308122637178), ('AMAT', 'ENPH', 0.32641556160121865), ('AMAT', 'GEN', 0.29700555083612257), ('CDNS', 'ENPH', 0.2866114679613072), ('CDW', 'ENPH', 0.2822900053266096), ('ADBE', 'AKAM', 0.43403370985960793), ('APH', 'ADI', 0.6690229507866844), ('APH', 'CDNS', 0.6199248434544392), ('APH', 'CDW', 0.6693047569678652), ('APH', 'ENPH', 0.31489207383678003), ('ADI', 'CDNS', 0.6263922969109393), ('ADI', 'CDW', 0.5707645838936513), ('ADI', 'ENPH', 0.2905308122637178), ('AMAT', 'ENPH', 0.32641556160121865), ('AMAT', 'GEN', 0.29700555083612257), ('CDNS', 'ENPH', 0.2866114679613072), ('CDW', 'ENPH', 0.2822900053266096)]


In [41]:
#for cluster_num, cluster in enumerate(clusters, start=1):
cluster=clusters[2]
#print(f"Analyzing Cluster 2: {cluster}")
n = len(cluster)
for i in range(n):
    for j in range(i + 1, n):
        stock1 = cluster[i]
        stock2 = cluster[j]
        #print(f"Checking stock pair {stock1}, {stock2}")
        correlation = calculate_correlation(price_data, stock1, stock2)
        cointegrated2 = is_cointegrated(price_data, stock1, stock2)
        if cointegrated2:
            #print(f"Selected pair: {stock1}, {stock2}, correlation: {correlation} AND THEYRE {cointegrated2}")
            selected_pairs2.append((stock1, stock2, correlation))
print("\n")

print("Selected pairs for mean reversion trading:", selected_pairs2)



Selected pairs for mean reversion trading: []


In [39]:
#CLUSTER 3 HAS ONLY ONE PAIR !!!


#for cluster_num, cluster in enumerate(clusters, start=1):
cluster=clusters[3]
#print(f"Analyzing Cluster 3: {cluster}")
n = len(cluster)
for i in range(n):
    for j in range(i + 1, n):
        stock1 = cluster[i]
        stock2 = cluster[j]
        #print(f"Checking stock pair {stock1}, {stock2}")
        correlation = calculate_correlation(price_data, stock1, stock2)
        cointegrated3 = is_cointegrated(price_data, stock1, stock2)
        if cointegrated3:
            #print(f"Selected pair: {stock1}, {stock2}, correlation: {correlation} AND THEYRE {cointegrated3}")
            selected_pairs3.append((stock1, stock2, correlation))
print("\n")

print("Selected pairs for mean reversion trading:", selected_pairs3)



Selected pairs for mean reversion trading: [('AMD', 'AAPL', 0.43049899515824624), ('AMD', 'EPAM', 0.37269439771011076), ('ANSS', 'ADSK', 0.6918092364804238), ('AAPL', 'AVGO', 0.6179220848051642), ('AAPL', 'FTNT', 0.4828511462495335), ('ADSK', 'GLW', 0.5064173276811162), ('ADSK', 'EPAM', 0.48882153452642907), ('AVGO', 'FTNT', 0.4795144024071683), ('GLW', 'EPAM', 0.40453098343778415), ('GLW', 'FFIV', 0.47195571378974177), ('AMD', 'AAPL', 0.43049899515824624), ('AMD', 'EPAM', 0.37269439771011076), ('ANSS', 'ADSK', 0.6918092364804238), ('AAPL', 'AVGO', 0.6179220848051642), ('AAPL', 'FTNT', 0.4828511462495335), ('ADSK', 'GLW', 0.5064173276811162), ('ADSK', 'EPAM', 0.48882153452642907), ('AVGO', 'FTNT', 0.4795144024071683), ('GLW', 'EPAM', 0.40453098343778415), ('GLW', 'FFIV', 0.47195571378974177), ('AMD', 'AAPL', 0.43049899515824624), ('AMD', 'EPAM', 0.37269439771011076), ('ANSS', 'ADSK', 0.6918092364804238), ('AAPL', 'AVGO', 0.6179220848051642), ('AAPL', 'FTNT', 0.4828511462495335), ('AD

In [40]:
selected_pairs2

[]