In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import coint

In [2]:
def load_stock_data(folder_path):
    all_data = {}
    
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            try:
                data = pd.read_csv(file_path, index_col=0, parse_dates=True, usecols=[0, 4])
                symbol = file_name.replace('.csv', '')
                all_data[symbol] = data.iloc[:, 0]
            except Exception as e:
                print(f"讀取文件 {file_name} 時出現錯誤: {e}")
    
    if all_data:
        all_data_combined = pd.concat(all_data.values(), axis=1, join='outer')
        all_data_combined.columns = all_data.keys()
        
        all_data_combined.ffill(inplace=True)
        all_data_combined.bfill(inplace=True)
        
    else:
        all_data_combined = pd.DataFrame()

    return all_data_combined, list(all_data.keys())

def find_cointegrated_pairs(data, significance_level=0.05):
    n = data.shape[1]
    pvalue_matrix = np.ones((n, n))
    keys = data.columns
    pairs = []
    
    for i in range(n):
        for j in range(i+1, n):
            stock1 = data.iloc[:, i]
            stock2 = data.iloc[:, j]
            
            if len(stock1) > 2 and len(stock2) > 2:
                try:
                    score, pvalue, _ = coint(stock1, stock2)
                    pvalue_matrix[i, j] = pvalue
                    if pvalue < significance_level:
                        pairs.append((keys[i], keys[j], pvalue))
                except ValueError as e:
                    print(f"協整檢驗失敗：{keys[i]} 和 {keys[j]} - {str(e)}")
    
    return pairs, pvalue_matrix

In [3]:
folder_path = '/Users/xinc./Documents/GitHub/Quant/data/stock_price'
stock_data, stock_symbols = load_stock_data(folder_path)

In [4]:
pairs, pvalue_matrix = find_cointegrated_pairs(stock_data)

print("\n協整股票對和p-value:")
for pair in pairs:
    print(f"{pair[0]} 和 {pair[1]} - p-value: {pair[2]:.4f}")

print("\nP-value 矩陣:")
pvalue_df = pd.DataFrame(pvalue_matrix, columns=stock_symbols, index=stock_symbols)
print(pvalue_df)


協整股票對和p-value:
6796 和 1416 - p-value: 0.0469
6796 和 6168 - p-value: 0.0134
6796 和 9924 - p-value: 0.0186
6796 和 2911 - p-value: 0.0000
6796 和 3576 - p-value: 0.0328
6796 和 6901 - p-value: 0.0130
6796 和 3041 - p-value: 0.0045
6796 和 2412 - p-value: 0.0341
6796 和 3043 - p-value: 0.0003
6796 和 3094 - p-value: 0.0001
6796 和 2832 - p-value: 0.0336
6796 和 2614 - p-value: 0.0083
6796 和 1444 - p-value: 0.0252
6796 和 4994 - p-value: 0.0318
6796 和 1340 - p-value: 0.0015
3605 和 1760 - p-value: 0.0049
3605 和 3413 - p-value: 0.0343
3605 和 4426 - p-value: 0.0478
3605 和 1402 - p-value: 0.0007
3605 和 4552 - p-value: 0.0243
3605 和 1615 - p-value: 0.0460
3605 和 6768 - p-value: 0.0102
3605 和 4949 - p-value: 0.0479
3605 和 1414 - p-value: 0.0351
3605 和 9935 - p-value: 0.0404
3605 和 6592 - p-value: 0.0010
3605 和 1216 - p-value: 0.0067
3605 和 9931 - p-value: 0.0366
3605 和 2468 - p-value: 0.0094
3605 和 6168 - p-value: 0.0275
3605 和 2642 - p-value: 0.0449
3605 和 2330 - p-value: 0.0460
3605 和 6235 - p-value: 0