In [3]:
import numpy as np
import pandas as pd
import yfinance as yf
#!pip install finta
from finta import TA
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

In [2]:
stocks = ['IBM', 'AAPL','A','AAL','AAP']
test = yf.download(stocks, start="2021-01-01", end="2022-01-01", group_by='ticker')

[*********************100%***********************]  5 of 5 completed


In [4]:
features = ['SMA', 'EMA', 'VAMA','HMA','MOM','RSI','ROC','SAR']
tickers = test.columns.get_level_values(0).unique()
cols = [(t,f) for t in tickers for f in features]
cols = pd.MultiIndex.from_tuples(cols)
test_df = pd.DataFrame(columns=cols)
for ticker in tickers:
    test_df[(ticker, 'SMA')] = TA.SMA(test[ticker])
    test_df[(ticker, 'EMA')] = TA.EMA(test[ticker])
    test_df[(ticker, 'VAMA')] = TA.VAMA(test[ticker])
    test_df[(ticker, 'HMA')] = TA.HMA(test[ticker])
    test_df[(ticker, 'MOM')] = TA.MOM(test[ticker])
    test_df[(ticker, 'RSI')] = TA.RSI(test[ticker])
    test_df[(ticker, 'ROC')] = TA.ROC(test[ticker])
    test_df[(ticker, 'SAR')] = TA.SAR(test[ticker])
test_df.index = pd.to_datetime(test_df.index)

In [7]:
# 对某一天做PCA, pick 50
test0 = test_df.iloc[[50]]
df = test0.reset_index().drop('Date', axis=1, level=0).T.reset_index()
df.columns = ['ticker', 'type', 'value']
df = df.pivot_table(index='ticker', columns='type', values='value')
df

type,EMA,HMA,MOM,ROC,RSI,SAR,SMA,VAMA
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,121.697409,122.151699,3.219994,-1.574555,50.926488,112.983601,123.264146,120.521828
AAL,23.322153,24.117167,2.99,18.79131,73.979061,20.362103,19.210244,23.44689
AAP,176.712375,181.936017,16.130005,11.865242,66.23219,176.804797,163.84756,177.656267
AAPL,123.162537,122.46713,2.700005,-2.371077,48.563188,117.762951,129.837805,121.82212
IBM,121.164464,123.67264,6.376678,6.866,63.702921,118.113791,117.367672,121.498052


In [10]:
pca = PCA(n_components=3)
minmaxscaler = MinMaxScaler()
df_scale = minmaxscaler.fit_transform(df)
df_scale = pd.DataFrame(df_scale, columns=df.columns)
df_scale

type,EMA,HMA,MOM,ROC,RSI,SAR,SMA,VAMA
0,0.64134,0.621184,0.038718,0.037639,0.092985,0.592047,0.719413,0.629501
1,0.0,0.0,0.021593,1.0,1.0,0.0,0.0,0.0
2,1.0,1.0,1.0,0.672718,0.695196,1.0,1.0,1.0
3,0.650891,0.623183,0.0,0.0,0.0,0.622598,0.764862,0.637933
4,0.637865,0.630821,0.273766,0.436486,0.59568,0.62484,0.678645,0.635831


In [11]:
pca.fit_transform(df_scale)
df_pca = pd.DataFrame(pca.components_[:1,:],columns=df.columns.values, index = ['PC1'])
df_pca

Unnamed: 0,EMA,HMA,MOM,ROC,RSI,SAR,SMA,VAMA
PC1,-0.396492,-0.392237,-0.272322,0.267595,0.25367,-0.389871,-0.413904,-0.394351


In [12]:
pc_weight = pca.components_[:1,:]
pc_weight

array([[-0.39649233, -0.39223738, -0.27232163,  0.2675948 ,  0.25367007,
        -0.38987127, -0.41390404, -0.39435132]])

(8, 1)

In [21]:
#计算出在随机选取的日期中, PC1*因子值的结果
df.apply(lambda s: s@pc_weight.T, axis=1, result_type='expand')

Unnamed: 0_level_0,0
ticker,Unnamed: 1_level_1
A,-227.140772
AAL,-20.862329
AAP,-332.650817
AAPL,-233.613414
IBM,-222.830534


In [20]:
# LSB 的 伟大代码

In [22]:
# Generating Features, use ticker with yfinance, read close/volume/high/low and use package to generate features.
# finta: https://github.com/peerchemist/finta
# need period, return null when valid data is less than 80% of the whole period
# ticker:'AMZN', start:'2019-01-01'
from finta import TA
def feature_generate(ticker,start,end):
    tk = yf.Ticker(ticker)
    ohlc = pd.DataFrame()
    ohlc['open'] = tk.history(start=start, end=end)['Open']
    ohlc["high"] = tk.history(start=start, end=end)['High']
    ohlc["low"] = tk.history(start=start, end=end)['Low']
    ohlc["close"] = tk.history(start=start, end=end)['Close']
    ohlc["volume"] = tk.history(start=start, end=end)['Volume']
    
    sta = pd.DataFrame()
    sta['return'] = (ohlc["close"] - ohlc["close"].shift())/ohlc["close"].shift()
    sta['open'] = ohlc['open']/ohlc["close"]
    sta["high"] = ohlc['high']/ohlc["close"]
    sta["low"] = ohlc['low']/ohlc["close"]
    sta["volume"] = (ohlc["volume"] - ohlc["volume"].shift())/ohlc["volume"].shift()
    
    df = pd.DataFrame()
    df['SMA']= TA.SMA(ohlc)
    df['SMM'] = TA.SMM(ohlc)
    df['ER'] = TA.ER(ohlc)
    df['MACD'] = TA.VW_MACD(ohlc).iloc[:,0]
    df['signal'] = TA.VW_MACD(ohlc).iloc[:,1]
    df['MOM'] = TA.MOM(ohlc)
    df['ROC'] = TA.ROC(ohlc)
    df['RSI'] = TA.RSI(ohlc)
    df['TR']= TA.TR(ohlc)
    df['SAR'] = TA.SAR(ohlc)
    df['BBANDS'] = TA.BBANDS(ohlc).iloc[:,1]
    df['BBWIDTH'] = TA.BBWIDTH(ohlc)
    df['KC_UPPER'] = TA.KC(ohlc).iloc[:,0]
    df['KC_LOWER'] = TA.KC(ohlc).iloc[:,1]
    df['DO'] = TA.DO(ohlc).iloc[:,1]
    df['DMI'] = TA.DMI(ohlc).iloc[:,1]
    df['PIVOT'] = TA.PIVOT(ohlc).iloc[:,0]
    df['WILLIAMS']= TA.WILLIAMS(ohlc)
    df['UO'] = TA.UO(ohlc)
    df['AO'] = TA.AO(ohlc)
    df['MI'] = TA.MI(ohlc)
    df['Vlm'] = TA.VORTEX(ohlc).iloc[:,0]
    df['Vlp'] = TA.VORTEX(ohlc).iloc[:,1]
    df['KST'] = TA.KST(ohlc).iloc[:,0]
    df['TP'] = TA.TP(ohlc)
    df['ADL'] = TA.ADL(ohlc)      
    df['CHAIKIN']= TA.CHAIKIN(ohlc)
    df['MFI'] = TA.MFI(ohlc)
    df['OBV'] = TA.OBV(ohlc)
    df['EFI'] = TA.EFI(ohlc)
    df['Bull'] = TA.EBBP(ohlc).iloc[:,0]
    df['Bear'] = TA.EBBP(ohlc).iloc[:,1]
    df['EMV'] = TA.EMV(ohlc)
    df['CCI'] = TA.CCI(ohlc)
    df['COPP']= TA.COPP(ohlc)
    df['Buy'] = TA.BASP(ohlc).iloc[:,0]
    df['Sell'] = TA.BASP(ohlc).iloc[:,1]
    df['Short'] = TA.CHANDELIER(ohlc).iloc[:,0]
    df['Long'] = TA.CHANDELIER(ohlc).iloc[:,1]
    df['FISH'] = TA.FISH(ohlc)
    df['VPT'] = TA.VPT(ohlc)
    df['MSD'] = TA.MSD(ohlc)
    df['STC'] = TA.STC(ohlc)
    return sta.join(df)

In [26]:
start='2018-01-01'
end='2019-01-01'
aapl = feature_generate('AAPL',start,end)

In [28]:
aapl.iloc[60:]

Unnamed: 0_level_0,return,open,high,low,volume,SMA,SMM,ER,MACD,signal,...,CCI,COPP,Buy,Sell,Short,Long,FISH,VPT,MSD,STC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-03-29,0.007809,1.000179,1.023662,0.994755,-0.078477,40.892586,40.370365,0.373153,-0.245688,0.102715,...,-106.714901,-7.359557,0.606943,1.745424,42.214298,41.094266,-1.772572,-3.283545e+08,1.149213,4.736952e-15
2018-04-02,-0.006556,0.999760,1.013559,0.986741,-0.021139,40.892177,40.248428,0.383106,-0.299088,0.022354,...,-121.779584,-8.744584,1.455898,0.973962,42.163868,41.032318,-2.006785,-3.256621e+08,1.218671,4.736952e-15
2018-04-03,0.010259,0.995546,1.002138,0.979156,-0.194451,40.899707,40.248428,0.241691,-0.304319,-0.042980,...,-97.475143,-9.308130,1.764799,0.131864,42.057909,41.138276,-2.311616,-2.787196e+08,1.247918,4.736952e-15
2018-04-04,0.019122,0.960783,1.002331,0.960142,0.142926,40.968295,40.248428,0.114331,-0.247109,-0.083806,...,-59.300560,-8.694362,3.393943,0.175266,42.168433,41.027753,-2.096624,-2.137679e+07,1.240601,1.048640e-01
2018-04-05,0.006934,0.998727,1.008275,0.995833,-0.221708,41.067112,40.248428,0.052813,-0.190131,-0.105071,...,-5.285981,-7.962081,0.291027,0.502909,42.148868,41.047317,-1.206688,6.729624e+05,1.228527,7.854402e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-24,-0.025874,1.008990,1.032146,0.998366,-0.611788,44.700634,39.623348,0.718977,-2.378530,-2.152756,...,-205.867579,-21.604115,0.097049,1.187449,39.446910,40.681975,-3.784813,-1.261045e+09,2.322518,9.473903e-15
2018-12-26,0.070422,0.943564,1.000382,0.933512,0.576103,44.335936,38.886189,0.279239,-2.227117,-2.167628,...,-130.897303,-21.017292,5.467576,0.024622,39.670698,40.458187,-3.324586,-8.655153e+08,2.400089,9.473903e-15
2018-12-27,-0.006490,0.998015,1.003970,0.961063,-0.093294,43.985787,37.987083,0.311372,-2.112261,-2.156555,...,-109.821580,-20.474010,2.669138,0.238161,39.750459,40.378426,-2.750431,-8.458551e+08,2.459872,9.473903e-15
2018-12-28,0.000512,1.008129,1.014658,0.989247,-0.203808,43.659959,37.904900,0.369664,-2.019850,-2.129214,...,-82.724600,-19.458244,0.600098,0.711453,39.726729,40.402155,-2.051854,-9.540869e+08,2.501736,8.924085e-02
