In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from scipy.stats import zscore
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from datetime import date
import yfinance as yf

warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf
import plotly.express as px
import pandas_datareader.data as web  
from pandas_datareader.famafrench import get_available_datasets



In [2]:
today = date.today()
past = np.busday_offset(today, -200, roll='backward')

# Create a dataframe for your tickers
df_tickers = pd.DataFrame({
    'Ticker': ['BABA', 'PDD', 'BEKE', 'BIDU', 'NTES', 'TCOM', 'VIPS', 'JD',
               'TME', 'BILI', 'TAL', 'IQ', 'QFIN', 'ATHM', 'ATHM', 'CD', 'YY', 'WB', 'LU']})

# Define a function to get the 'Open' prices
def get_open_price(ticker, start_date= str(past), end_date= str(today)):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data['Open']

# Create a dataframe to hold 'Open' prices
market = pd.DataFrame()

for ticker in df_tickers['Ticker']:
    market[ticker] = get_open_price(ticker)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [4]:
top40=['BABA', 'PDD', 'BEKE', 'BIDU', 'NTES', 'TCOM', 'VIPS', 'JD', 'TME','BILI', 'TAL', 'IQ', 'QFIN', 'ATHM', 'CD', 'YY', 'WB', 'LU']

In [5]:
class Factor(object):
    def __init__(self,market,idx):
        self.M = 50 #rolling windoes
        self.market = market

        self.hourly_return = None
        self.eigenvalues = None
        self.eigenvectors = None
        self.Q = None
        self.F = self.eigenportfolio(idx)
        
    def eigenportfolio(self,idx):

        data_slice = self.market[idx-self.M:idx]
        data_slice = data_slice.pct_change()
        
        tempdata = data_slice.iloc[1:]
        self.hourly_return=tempdata
        data_slice = self.hourly_return.apply(zscore,nan_policy = "omit")
        cor = data_slice.corr()
        cor.replace(np.nan, 0, inplace=True)
        pca = PCA(n_components=2).fit(cor)
        self.eigenvalues = pca.explained_variance_
        self.eigenvectors = pca.components_

        price_std = np.sqrt(np.array(data_slice.var()))
        self.Q = self.eigenvectors/price_std
        return self.Q@self.hourly_return.T

In [6]:
class Residuals(object):
    def __init__(self,market,idx):
        factor = Factor(market,idx)
        self.top40 = top40
        self.hourly_return = factor.hourly_return
        self.F = factor.F
        
        self.residual = self.return_residual()
        self.params = self.get_params()
        self.s_score = self.get_s_score()
        self.signal = self.trading_signal()
    
    def return_residual(self):
        x = self.F.T
        x.replace([np.nan,np.inf], 0, inplace=True)
        residual = {}
        for col in self.hourly_return.columns:
            y = self.hourly_return.loc[:,col]
            y.replace([np.nan,np.inf], 0, inplace=True)
            lr1 = LinearRegression().fit(x,y)
            pred_y = lr1.predict(x)
            residual[col] = y - pred_y
        return pd.DataFrame(residual)
    
    def get_params(self):
        current = pd.DataFrame(np.cumsum(self.residual.values,axis=0))
        previous = current[:-1]
        current = current[1:]
        
        params = pd.DataFrame(index = ["K","m","sigma","sigma_eq"])
        for col in current.columns:
            lr2 = LinearRegression().fit(previous[[col]],current[[col]])
            pred_y2 = lr2.predict(previous[[col]])
            A = float(lr2.intercept_)
            B = float(lr2.coef_)
            residual = current[[col]] - pred_y2

            K = -np.log(B)*8760
            m = A/(1-B)
            sigma = np.sqrt((float(np.var(residual)) * 2*K)/(1 - B*B))
            sigma_eq = sigma / np.sqrt(2*K)
            params[col] = [K,m,sigma,sigma_eq]
        return params
    
    def get_s_score(self):
        return (self.residual.iloc[-1].values-self.params.loc['m'].values)/self.params.loc['sigma_eq'].values
    
    def trading_signal(self):
        sig = {}
        
        s_bo = 1.25
        s_so = 1.25
        s_bc = 0.75
        s_sc = 0.5
        
        for i in range(18):
            score = self.s_score[i]
            conditions = [(score<-s_bo),((-s_bo<= score)&(score<= -s_sc)), ((-s_sc<score)&(score<s_bc)),  ((s_bc<= score)&(score <= s_so)),(score > s_so)]
            signal_values = [["Open_Long","Close_Short"],["Close_Short"],["Close_Long","Close_Short"],["Close_Long"],["Open_Short","Close_Long"]]
            sig[self.top40[i]] = set(np.select(conditions,signal_values))
        
        return sig
          

In [7]:
Residuals(market,len(market['BABA'])).signal

{'BABA': {'Close_Long', 'Close_Short'},
 'PDD': {'Close_Long', 'Open_Short'},
 'BEKE': {'Close_Long', 'Open_Short'},
 'BIDU': {'Close_Long'},
 'NTES': {'Close_Short', 'Open_Long'},
 'TCOM': {'Close_Long', 'Close_Short'},
 'VIPS': {'Close_Long', 'Close_Short'},
 'JD': {'Close_Short'},
 'TME': {'Close_Long'},
 'BILI': {'Close_Short'},
 'TAL': {'Close_Long', 'Close_Short'},
 'IQ': {'Close_Long', 'Close_Short'},
 'QFIN': {'Close_Short'},
 'ATHM': {'Close_Short', 'Open_Long'},
 'CD': {'Close_Short'},
 'YY': {'Close_Long', 'Close_Short'},
 'WB': {'Close_Short'},
 'LU': {'Close_Short', 'Open_Long'}}