## TA-Lib으로 필요한 기술지표 변환
- https://github.com/TA-Lib/ta-lib-python

### 1. Size
- **Market Capitalization**
- Price-to-Earnings (P/E) ratio
- Price-to-Book (P/B) ratio

### 2. Value
- **Book-to-market ratio (= 1/PBR)**
- Assets(Book Value) 
- Price-to-Earnings (P/E) ratio
- (Dividend, Earning yield, ROE ratio)

### 3. Volatility
- Bollinger Bands(20)
- Average True Range (ATR; 14)
- Chaikin Volatility (10)
- Relative Volatility Index (RVI)
- **Standard deviation**
- 급변 지표 : fK, fD, SD, CCI, TR


### 4. Momentum
-Momentum(MOM)
- Relative Strength Index (RSI; 10,14)
- Moving Average Convergence Divergence (MACD; 6,12)
- Stochastic Oscillator
- Average Directional Index (ADX; 7,14)
- Rate of Change (ROC; 12)
- WillR


In [3]:
import os
import talib
import numpy as np
import pandas as pd

In [4]:
# 예제코드
close = np.random.random(100)
output = talib.SMA(close)
print(output)

[       nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan 0.53681108
 0.53355042 0.51974081 0.50714978 0.50964206 0.50506538 0.49861631
 0.47942958 0.47776191 0.50023692 0.49466157 0.49314826 0.50307613
 0.5108071  0.51355969 0.51680727 0.54425213 0.52435563 0.52623561
 0.51899169 0.51548026 0.51849307 0.50875643 0.49473639 0.49200705
 0.47537701 0.44766309 0.41992383 0.4259192  0.44044519 0.45012103
 0.46158539 0.48112234 0.46925826 0.46145412 0.44974252 0.43926981
 0.43675832 0.44926717 0.43137354 0.42394955 0.42020507 0.39870557
 0.3954462  0.38491133 0.38015936 0.36639588 0.37915068 0.3863637
 0.41030229 0.39569025 0.39555294 0.38806855 0.39756101 0.39121541
 0.41048207 0.4168978  0.41835369 0.42502366 0.42955595 0.41658

In [15]:
# list of functions
print(talib.get_functions())

# dict of functions by group
print(talib.get_function_groups())
print(list(talib.get_function_groups().keys()))

['HT_DCPERIOD', 'HT_DCPHASE', 'HT_PHASOR', 'HT_SINE', 'HT_TRENDMODE', 'ADD', 'DIV', 'MAX', 'MAXINDEX', 'MIN', 'MININDEX', 'MINMAX', 'MINMAXINDEX', 'MULT', 'SUB', 'SUM', 'ACOS', 'ASIN', 'ATAN', 'CEIL', 'COS', 'COSH', 'EXP', 'FLOOR', 'LN', 'LOG10', 'SIN', 'SINH', 'SQRT', 'TAN', 'TANH', 'ADX', 'ADXR', 'APO', 'AROON', 'AROONOSC', 'BOP', 'CCI', 'CMO', 'DX', 'MACD', 'MACDEXT', 'MACDFIX', 'MFI', 'MINUS_DI', 'MINUS_DM', 'MOM', 'PLUS_DI', 'PLUS_DM', 'PPO', 'ROC', 'ROCP', 'ROCR', 'ROCR100', 'RSI', 'STOCH', 'STOCHF', 'STOCHRSI', 'TRIX', 'ULTOSC', 'WILLR', 'BBANDS', 'DEMA', 'EMA', 'HT_TRENDLINE', 'KAMA', 'MA', 'MAMA', 'MAVP', 'MIDPOINT', 'MIDPRICE', 'SAR', 'SAREXT', 'SMA', 'T3', 'TEMA', 'TRIMA', 'WMA', 'CDL2CROWS', 'CDL3BLACKCROWS', 'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3STARSINSOUTH', 'CDL3WHITESOLDIERS', 'CDLABANDONEDBABY', 'CDLADVANCEBLOCK', 'CDLBELTHOLD', 'CDLBREAKAWAY', 'CDLCLOSINGMARUBOZU', 'CDLCONCEALBABYSWALL', 'CDLCOUNTERATTACK', 'CDLDARKCLOUDCOVER', 'CDLDOJI', 'CDLDOJISTAR',

In [16]:
talib.get_function_groups()["Volatility Indicators"]

['ATR', 'NATR', 'TRANGE']

## 팩터 투자를 위한 기술지표 만드는 클래스 함수

In [39]:
class FactorIndicators():
    def __init__(self) -> None:
        pass

    @staticmethod
    def get_volatility(stock_data:pd.DataFrame):
        upper, middle, lower = talib.BBANDS(stock_data["close"], timeperiod=20) # Bollinger Band
        std = talib.STDDEV(stock_data["close"], timeperiod=14) #STDDEV(close, timeperiod=5, nbdev=1)
        atr = talib.ATR(stock_data["high"], stock_data["low"], stock_data["close"], timeperiod=14)
        adosc = talib.ADOSC(stock_data["high"], stock_data["low"], stock_data["close"], stock_data["volume"],
                            fastperiod=3, slowperiod=10) #Chaikin A/D Oscillator
        cci = talib.CCI(stock_data["high"], stock_data["low"], stock_data["close"], timeperiod=14) # CCI(high, low, close, timeperiod=14)
        
        # make pd.DataFrame() and save it
        # NaN값 제거는... 하아... 고민좀 해보기... ㅜ_ㅜ
        total_df = stock_data.copy()
        total_df = total_df.assign(upper_bb = upper, middle_bb = middle, lower_bb=lower,
                                   std=std, atr=atr, adosc=adosc, cci=cci)
        return total_df
    
    @staticmethod
    def get_momentum(stock_data:pd.DataFrame):
        # mom5 = talib.MOM(close, timeperiod=5) # 100days only
        # ma = talib.MA(close, timeperiod=30, matype=0)
        # wma = talib.WMA(close, timeperiod=30)
        rsi = talib.RSI(stock_data["close"], timeperiod=14) #10, 14
        macd, macd_signal, macd_hist = talib.MACD(stock_data["close"], fastperiod=12, slowperiod=26, signalperiod=9) #6, 12
        adx = talib.ADX(stock_data["high"], stock_data["low"], stock_data["close"], timeperiod=14) #7,14
        roc = talib.ROC(stock_data["close"], timeperiod=12) #12 #Rate of change : ((price/prevPrice)-1)*100
        willr = talib.WILLR(stock_data["high"], stock_data["low"], stock_data["close"], timeperiod=14) 
        slowk, slowd = talib.STOCH(stock_data["high"], stock_data["low"], stock_data["close"], 
                                   fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
        total_df = stock_data.copy()
        total_df = total_df.assign(rsi=rsi, macd=macd,
                                   adx=adx, roc=roc, willr=willr, slowk=slowk, slowd=slowd)
        
        return total_df

    @staticmethod
    def get_rep_factor(stock_data:pd.DataFrame):
        # 대표적인 팩터 한개씩만 사용하기
        std = talib.STDDEV(stock_data["close"], timeperiod=14) 
        macd, macd_signal, macd_hist = talib.MACD(stock_data["close"], fastperiod=12, slowperiod=26, signalperiod=9) #6, 12
        total_df = stock_data.copy()
        total_df = total_df.assign(std=std, macd=macd)
        return total_df

## All Data Load 

In [41]:
BASE_DIR = "/home/ubuntu2010/바탕화면"
DATA_DIR = f"{BASE_DIR}/famafrench_data/stockdata/final_data"
SAVE_DIR = f"{BASE_DIR}/famafrench_data/stockdata/final_with_factor"
country = "USA"

# os.makedirs(SAVE_DIR+f"/{country}", exist_ok=True)
os.makedirs(SAVE_DIR+f"/{country}/onlyfactor", exist_ok=True)
os.makedirs(SAVE_DIR+f"/{country}/rep", exist_ok=True)
os.makedirs(SAVE_DIR+f"/{country}/whole", exist_ok=True)


ticker_list = [i.split(".csv")[0] for i in os.listdir(DATA_DIR+f"/{country}") if i.endswith(".csv")]
for tic in ticker_list:
    # print(tic)
    df = pd.read_csv(f'{DATA_DIR}/{country}/{tic}.csv', index_col="date", parse_dates=["date"])
    # df.rename(columns = {'MarketCap_new' : 'MarketCap'}, inplace = True)
    
    # make data
    factor = FactorIndicators()
    data1 = factor.get_volatility(df)
    data2 = factor.get_momentum(data1)
    rep_data = factor.get_rep_factor(df)

    # save file include ohlcv
    data2.to_csv(f'{SAVE_DIR}/{country}/whole/{tic}.csv')
    rep_data.to_csv(f'{SAVE_DIR}/{country}/rep/{tic}.csv')
    
    # save file exclude ohlcv
    data3 = data2.drop(['open', 'high', 'low', 'close', 'volume'], axis=1)
    data3.to_csv(f'{SAVE_DIR}/{country}/onlyfactor/{tic}.csv') 
    print(f"Ticker {tic} is done.")

print("All Finished.")


Ticker DIS is done.
Ticker PPG is done.
Ticker VFC is done.
Ticker JPM is done.
Ticker ALK is done.
Ticker TYL is done.
Ticker UHS is done.
Ticker SHW is done.
Ticker FMC is done.
Ticker MSI is done.
Ticker NKE is done.
Ticker CVS is done.
Ticker BK is done.
Ticker MMM is done.
Ticker NEE is done.
Ticker OXY is done.
Ticker OMC is done.
Ticker PHM is done.
Ticker BRO is done.
Ticker VLO is done.
Ticker LUMN is done.
Ticker K is done.
Ticker IBM is done.
Ticker BAX is done.
Ticker CMS is done.
Ticker CVX is done.
Ticker WMT is done.
Ticker EIX is done.
Ticker CMI is done.
Ticker BFb is done.
All Finished.


### 1. Size : 구할 수 있는 것이 없음. 데이터 추가 수집 필요
- **Market Capitalization**
- Price-to-Earnings (P/E) ratio
- Price-to-Book (P/B) ratio

### 3. Volatility
- Bollinger Bands(20)
- Average True Range (ATR; 14)
- Chaikin Volatility (10)
- Relative Volatility Index (RVI)
- **Standard deviation**
- 급변 지표 : fK, fD, SD, CCI, TR

Unnamed: 0_level_0,open,high,low,close,volume,BTM,marketCap_new,upper_bb,middle_bb,lower_bb,std,atr,adosc,cci
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-03,6.08000,6.20000,6.06667,6.14667,1075313,2.17,2.429287e+05,,,,,,,
2000-01-04,6.12000,6.20000,5.97333,6.03333,806250,2.13,2.384493e+05,,,,,,,
2000-01-05,6.00000,6.13333,6.00000,6.08000,744375,2.15,2.402938e+05,,,,,,,
2000-01-06,6.05333,6.14667,6.04000,6.12000,1078125,2.16,2.418746e+05,,,,,,,
2000-01-07,6.10667,6.19333,6.10667,6.18667,1163438,2.19,2.445096e+05,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-22,66.50000,66.52000,65.24000,65.90000,929900,11.36,2.041028e+07,68.359692,66.9870,65.614308,0.644471,1.261802,969872.082403,-109.512761
2022-02-23,66.34000,66.99000,65.11000,65.18000,1292800,11.23,2.018729e+07,68.493411,66.9210,65.348589,0.701495,1.305959,523874.363931,-109.091596
2022-02-24,64.43000,64.69000,63.05000,64.58000,1215800,11.13,2.000146e+07,68.691474,66.8455,64.999526,0.873721,1.364819,628747.361760,-220.171720
2022-02-25,65.12000,66.58000,64.94000,66.46000,1218700,11.45,2.058373e+07,68.649468,66.8060,64.962532,0.853861,1.410189,945512.719337,-41.148325


In [35]:
data2 = factor.get_momentum(data1)

mom5  100
rsi  5781


In [20]:
std = talib.STDDEV(df["close"], timeperiod=14) #pandas.core.series.Series
df

Unnamed: 0_level_0,open,high,low,close,volume,BTM,marketCap_new
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-03,6.08000,6.20000,6.06667,6.14667,1075313,2.17,2.429287e+05
2000-01-04,6.12000,6.20000,5.97333,6.03333,806250,2.13,2.384493e+05
2000-01-05,6.00000,6.13333,6.00000,6.08000,744375,2.15,2.402938e+05
2000-01-06,6.05333,6.14667,6.04000,6.12000,1078125,2.16,2.418746e+05
2000-01-07,6.10667,6.19333,6.10667,6.18667,1163438,2.19,2.445096e+05
...,...,...,...,...,...,...,...
2022-02-22,66.50000,66.52000,65.24000,65.90000,929900,11.36,2.041028e+07
2022-02-23,66.34000,66.99000,65.11000,65.18000,1292800,11.23,2.018729e+07
2022-02-24,64.43000,64.69000,63.05000,64.58000,1215800,11.13,2.000146e+07
2022-02-25,65.12000,66.58000,64.94000,66.46000,1218700,11.45,2.058373e+07


### 4. Momentum
-Momentum(MOM)
- Relative Strength Index (RSI; 10,14)
- Moving Average Convergence Divergence (MACD; 6,12)
- Stochastic Oscillator
- Average Directional Index (ADX; 7,14)
- Rate of Change (ROC; 12)
- WillR