In [None]:
# 패키지 설치
!pip install pickle5 --quiet

In [None]:
# 패키지 임포트
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pickle5 as pickle
sns.set()

In [None]:
# 데이터 로드
with open('equity_price.pkl', 'rb') as f:
    df = pickle.load(f)

df.dropna(inplace=True)

In [None]:
df

In [None]:
# 수익률 데이터
rets = df.pct_change().dropna()

# 상관계수 행렬
corr = rets.corr()

# 상관계수 히트맵
plt.figure(figsize=(20, 12))
plt.title('Correlation Matrix')
sns.heatmap(np.round(corr, 1), annot=True)

In [None]:
# 수익률 데이터 정규화
scaler = StandardScaler().fit(rets)
rescaledDataset = pd.DataFrame(scaler.fit_transform(rets), columns=rets.columns, index=rets.index)
rescaledDataset.dropna(how='any', inplace=True)

# 정규화된 데이터 예시
plt.figure(figsize=(16, 5))
plt.title('AAPL Return')
rescaledDataset['AAPL'].plot()
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# PCA 모델 피팅
pca = PCA()
PrincipalComponent = pca.fit(rescaledDataset)

In [None]:
# 주성분분석 결과 시각화
NumEigenvlaues = 20

fig, axes = plt.subplots(ncols=2, figsize=(14, 4))
Series1 = pd.Series(pca.explained_variance_ratio_[:NumEigenvlaues]).sort_values()
Series2 = pd.Series(pca.explained_variance_ratio_[:NumEigenvlaues]).cumsum()
Series1.plot.barh(title='Explained Variance Ratio by Top Factors', ax=axes[0])
Series2.plot(ylim=(0, 1), ax=axes[1], title='Cumulative Explained Variance')

In [None]:
# 고유 포트폴리오 가중치
NumComponents = 5
topPortfolios = pd.DataFrame(pca.components_[:NumComponents], columns=rets.columns)
eigen_portfolios = topPortfolios.div(topPortfolios.sum(1), axis=0)
eigen_portfolios.index = [f'Portfolio {i}' for i in range(NumComponents)]

In [None]:
eigen_portfolios

In [None]:
# 시각화
eigen_portfolios.T.plot.bar(subplots=True, layout=(int(NumComponents), 1), figsize=(14, 10), legend=False)