In [1]:
import pandas as pd
import numpy as np
import os

Functions of this notebook: 
* Read cleaned data ("ticker.csv" in directory ".../Data/cleaned", generated by 1_data_cleaning) 
* Calculate average price and its movement direction 
* Calulate all technical indicators with selected parameters 
* Output one .csv file for each stock (with name "ticker_indicator.csv" and stored in directory ".../Data/indicator")

### Directory Setting

In [2]:
dir_working = "/Users/user/Desktop/E4733 AT/Project/Coding Environment/Codes"
os.chdir(dir_working)
dir_data = "../Data/cleaned"
dir_output = "../Data/indicator"
ticker = "aapl"
ticker_list = ['aapl', 'amzn', 'nvda', 'amd', 'msft', 'fb', 'nflx', \
               'goog', 'intc', 'pypl']

### 1. Direction Calculation Function

In [3]:
def direction_cal(df): # direction is 1 or -1
    df['average_price'] = (df['open']+df['close']+df['high']+df['low'])/4    
    df['return'] = df['average_price']/df['average_price'].shift(1) - 1
    df.loc[df['return'] > 0, 'direction'] = 1
    df.loc[df['return'] <= 0, 'direction'] = -1
    return df 

### 2. Technical Indicator Function

In [4]:
def MA(df, n=5):
    df['MA_'+ str(n)] = df.close.rolling(n).mean()
    df['MA_CAT_'+ str(n)] = np.sign(df['close'] - df['MA_'+ str(n)] )
    return df

In [5]:
def BIAS(df, n=5):
    df['BIAS_' + str(n)] = (df['close']-df['MA_'+ str(n)])/df['MA_'+ str(n)]
    return df

In [6]:
def MACD(data, win1 = 12, win2 = 26):
    data['MACD_'+str(win1)+'_'+str(win2)] = \
    data.close.rolling(win1).mean() - data.close.rolling(win2).mean()
    col_name = 'MACD_CAT_' + str(win1) + '_' + str(win2)
    data.loc[data['MACD_'+str(win1)+'_'+str(win2)] > 0, col_name] = 1
    data.loc[data['MACD_'+str(win1)+'_'+str(win2)] < 0, col_name] = -1
    data.loc[data['MACD_'+str(win1)+'_'+str(win2)] == 0, col_name] = 0
    return data

In [7]:
def WilliamR(df):
    df['WilliamR'] = (df['high']-df['close'])/(df['high'] - df['low'])
    return df

In [8]:
def RSI(data, win = 10):
    change = data.close - data.close.shift(1)
    change = change.values
    gain = np.zeros(len(change))
    loss = np.zeros(len(change))
    for i in range(len(change)):
        if change[i] >= 0:
            gain[i] = change[i]
        else:
            loss[i] = - change[i]
    data['gain'] = gain
    data['loss'] = loss
    data['gain_ma'] = data.gain.rolling(win).mean()
    data['loss_ma'] = data.loss.rolling(win).mean()
    data['RS'] = data.gain_ma/data.loss_ma
    data['RSI_'+str(win)] = 100 - 100/(1+data.RS)
    data = data.drop(['gain','loss','gain_ma','loss_ma','RS'],axis=1)
    return data

In [9]:
def PSY(df, period = 13):
    close = df.close.values
    dif = close[1:] - close[:-1]
    dif = np.append(0,dif)
    dif_dir = np.where(dif>0 , 1, 0)
    psy = np.zeros((len(close),))
    psy[:period] *= np.nan
    for i in range(period, len(close)):
        psy[i] = (dif_dir[i-period+1:i].sum())/period
    df['PSY_'+str(period)] = psy
    return df 

In [10]:
def DIF(data, win1 = 12, win2 = 26):
    ma1 = data.close.rolling(win1).mean()
    ma2 = data.close.rolling(win2).mean()
    data['DIF_'+str(win1)+'_'+str(win2)] = ma2 - ma1
    return data

In [11]:
def Disparity(data, win = 5):
    ma = data.close.rolling(win).mean()
    data['Disparity_'+str(win)] = (data.close/ma)*100
    return data

In [12]:
def AD_Oscillator(data):
    data['ADO'] = (data.high - data.close.shift(1))/(data.high - data.low)
    return data

In [13]:
def K_D(data, n = 14, m = 3):
    data['K_'+str(n)] = \
    100 * (data.close - data.low.rolling(n).min())\
    /(data.high.rolling(n).max() - data.low.rolling(n).min())
    data['D_'+str(n)+'_'+str(m)] = data['K_'+str(n)].rolling(m).mean()
    data['K_D'] = data['K_'+str(n)] - data['D_'+str(n)+'_'+str(m)]
    col_name = 'K_D_CAT_' + str(n) + '_' + str(m)
    data.loc[data.K_D > 0, col_name] = 1
    data.loc[data.K_D < 0, col_name] = -1
    data.loc[data.K_D == 0, col_name] = 0
    data = data.drop(['K_D'],axis=1)
    return data

In [14]:
def K_D_Bound(data, n = 14, u_b = 80, l_b = 20):
    data['K'] = \
    100 * (data.close - data.low.rolling(n).min())\
    /(data.high.rolling(n).max() - data.low.rolling(n).min())
    col_name = 'K_D_CAT_B_' + str(n) + '_' + str(u_b) + '_' + str(l_b)
    data.loc[data.K > u_b, col_name] = 1
    data.loc[data.K < l_b, col_name] = -1
    data.loc[(data.K >= l_b)&(data.K <= u_b), col_name] = 0
    data = data.drop(['K'],axis=1)
    return data 

In [15]:
def CCI(data, n = 5):
    col_name = 'CCI_' + str(n) 
    data['m'] = (data.high + data.low + data.close)/3
    data['sm'] = data.m.rolling(n).mean()
    m = data.m.values 
    sm = data.sm.values 
    cci = np.zeros(len(m))
    for i in range(len(m)):
        if i < n-1:
            cci[i] = np.nan
        else:
            cci_t = 0
            for j in range(i-n+1,i):
                cci_t += abs(m[j] - sm[i])
            cci[i] = cci_t
    data[col_name] = cci
    data = data.drop(['m','sm'],axis=1)
    return data

In [16]:
def Breakout(data, n = 30, s = 0.0025, l = 5):
    col_name = 'Breakout_' + str(n) + '_' + str(s) + '_' + str(l)
    data['min_px'] = data.close.rolling(n).min()
    data['max_px'] = data.close.rolling(n).max()
    close = data.close.values 
    min_px = data.min_px.values 
    max_px = data.max_px.values 
    position = np.zeros(len(close))
    open_px = 0
    d = 0
    hold = 0
    for i in range(len(close)):
        if i < n-1:
            position[i] = np.nan
        elif i == n-1:
            if close[i] == max_px[i]:
                position[i] = 1
                open_px = close[i]
                d = 1
                hold = 0
            elif close[i] == min_px[i]:
                position[i] = -1
                open_px = close[i]
                d = -1
                hold = 0
        else:
            if position[i-1] == 0:
                if close[i] == max_px[i]:
                    position[i] = 1
                    open_px = close[i]
                    d = 1
                    hold = 0
                elif close[i] == min_px[i]:
                    position[i] = -1
                    open_px = close[i]
                    d = -1
                    hold = 0
            else:
                if hold > l:
                    position[i] = 0
                else:
                    if d == 1:
                        if close[i] >= open_px * (1 + s*d):
                            position[i] = 0
                        else:
                            position[i] = d
                            hold = hold + 1
                        if close[i] == min_px[i]:
                            position[i] = -1
                            open_px = close[i]
                            d = -1
                            hold = 0
                    else: # d == -1
                        if close[i] <= open_px * (1 + s*d):
                            position[i] = 0
                        else:
                            position[i] = d
                            hold = hold + 1
                        if close[i] == max_px[i]:
                            position[i] = 1
                            open_px = close[i]
                            d = 1
                            hold = 0
    data[col_name] = position
    data = data.drop(['min_px', 'max_px'],axis=1)
    return data 

In [17]:
def VMA(data, n = 13):
    data['close_volume'] = data.close * data.volume 
    data['VMA_' + str(n)] = data.close_volume.rolling(n).sum() \
                            / data.volume.rolling(n).sum()
    data['close_vma'] = data.close - data['VMA_' + str(n)]
    col_name = 'VMA_Cat_' + str(n)
    data.loc[data.close_vma > 0, col_name] = 1
    data.loc[data.close_vma < 0, col_name] = -1
    data.loc[data.close_vma == 0, col_name] = 0
    data = data.drop(['close_volume', 'close_vma'],axis=1)
    return data

### 3. Function for Signal & Output

In [18]:
def signal_calculation(ticker = 'aapl'):
    # read data
    data = pd.read_csv(dir_data + '/' + ticker + '.csv')
    data.time = pd.to_datetime(data.time)
    data = data.set_index('time', drop = True)
    # direction 
    data = direction_cal(data)
    # signals 
    data = MA(data, n=3)
    data = MA(data, n=5)
    data = MA(data, n=10)
    data = MA(data, n=15)
    data = MA(data, n=20)
    data = MA(data, n=30)
    data = VMA(data, n=3)
    data = VMA(data, n=5)
    data = VMA(data, n=10)
    data = VMA(data, n=15)
    data = VMA(data, n=20)
    data = VMA(data, n=30)
    data = MACD(data, 3, 5)
    data = MACD(data, 5, 10)
    data = MACD(data, 5, 15)
    data = MACD(data, 5, 30)
    data = MACD(data, 10, 15)
    data = MACD(data, 10, 20)
    data = MACD(data, 10, 30)
    data = MACD(data, 15, 30)
    data = WilliamR(data)
    data = RSI(data, 10)
    data = RSI(data, 20)
    data = RSI(data, 30)
    data = PSY(data, 10)
    data = PSY(data, 15)
    data = PSY(data, 30)
    data = AD_Oscillator(data)
    data = K_D(data, n=15, m=3)
    data = K_D(data, n=5, m=3)
    data = K_D_Bound(data, n = 5)
    data = K_D_Bound(data, n = 15)
    data = CCI(data)
    data = data.drop(['MA_3','MA_5','MA_10','MA_15','MA_20','MA_30','VMA_3',\
                      'VMA_5','VMA_10','VMA_15','VMA_20','VMA_30','MACD_3_5',\
                      'MACD_5_10','MACD_5_15','MACD_5_30','MACD_10_15',\
                      'MACD_10_20','MACD_10_30','MACD_15_30','K_15', 'D_15_3',\
                      'K_5','D_5_3'],axis=1)
    data.to_csv(dir_output + '/' + ticker + '_indicator.csv')

### 6. Execution 

In [19]:
for i in range(len(ticker_list)):
    signal_calculation(ticker = ticker_list[i])