# MACD線交易策略驗證
Author：余慶龍  
Establish Date：2021.8.12  
Last Modified Date：2021.8.12

## Table of Contents
1. 讀取檔案模塊
2. 指標與交易策略
3. 回測模塊
4. Main：執行上面的模塊

## 1. 讀取檔案模塊

In [9]:
"""
把csv或excel檔案轉換成dataframe
以供後續的pandas操作
"""

import pandas as pd
import numpy as np
from datetime import datetime

def readStock_file(file, filetype='csv'):


    if filetype == 'excel':
        df = pd.read_excel(file, engine='openpyxl', parse_dates=True, header=None)
    else:
        df = pd.read_csv(file)

    # 取代原本的 column 名稱
    # 檔案的日期與開高低收需要照這個順序
    colume_name = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
    df.columns = colume_name

    # 用日期這一行當做 df 的索引
    df = df.set_index('Date')

    # 把日期轉成 datetime的格式(從string)
    df.index = pd.to_datetime(df.index)

    # 照日期排序並把空資料轉成numpy的nan
    df = df.sort_index()
    df = df.replace(r'^\s*-$', np.nan, regex=True)


    for col in df.columns:
        if(col=='Date'):
            continue;
        df[col] = np.array([float(x) for x in df[col]])

    return df

def random_sample(ticker_list_file,  column = '證券代碼', times = 10):
    

    ticker_list_df = pd.read_csv(ticker_list_file, encoding='utf8', usecols=[column])
    ticker_list = ticker_list_df[column].to_list()
    
    sample = np.random.choice(ticker_list, times)
    sample_list = [i.replace(' ', '') for i in sample.tolist()]

    return sample_list

def file_list_with_directory(file_list, path):
    
    files = [path+i+'.csv' for i in file_list]
    return files

def get_dataframe(data_file, days=0):
    ticker = pd.read_csv(data_file)

    # change the name of columns
    ticker.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']

    # set the column "Date" to index
    ticker = ticker.set_index('Date')

    # set index column("Date") from string to datetime.datetime
    ticker.index = pd.to_datetime(ticker.index)

    # sort index column("Date") chronologically
    ticker = ticker.sort_index()
    
    if(days):
        ticker = ticker.tail(days)
    return ticker

## 2. 指標與交易策略

In [57]:
# Define constants
MACD_N = 9
MACD_EMA_SLOW = 26
MACD_EMA_QUICK = 12
middle_line = 0

def MACD_indicator(ticker):

    # Create a empty dataFrame with original index
    # To store some temporary data
    tmp = pd.DataFrame(index=ticker.index)

    # Copy a dataFrame to store result
    # deep=True means make a copy instead of link
    data = ticker.copy(deep=True)

    tmp['ema12'] = data['Close'].ewm(span=MACD_EMA_QUICK, adjust=False).mean() # exponential moving average
    tmp['ema26'] = data['Close'].ewm(span=MACD_EMA_SLOW, adjust=False).mean() # exponential moving average

    data['macd_DIF']  = tmp['ema12'] - tmp['ema26']                             # qucik MACD
    data['macd_DEM'] = data['macd_DIF'].ewm(span = MACD_N, adjust=False).mean()# slow MACD
    data['macd_BAR'] = data['macd_DIF'] - data['macd_DEM']                     # MACD bar

    return data

In [58]:
def plot_trading(data, bound='normal'):
    
    plots = []
    
    # the position in graph
    buy, sell = [], []

    # Adjust the position to avoid signals block the lines
    for index, row in data.iterrows():
        buy.append(row['Close']*0.985 if row['buy'] else np.nan)
        sell.append(row['Close']*1.01 if row['sell'] else np.nan)


    plots = [ 
          mpf.make_addplot(data['macd_BAR'], type = 'bar', panel = 1, color = 'grey', ylabel = 'MACD'),
          mpf.make_addplot(data['macd_DIF'], panel = 1, color = 'blue'),
          mpf.make_addplot(data['macd_DEM'], panel = 1, color = 'orange'),
    
          # Add zero line
          mpf.make_addplot(np.full((len(data)), middle_line), panel = 1, color='#bf5ce0', alpha=0.5),
    ]

    # Add trading point to plot
    #    np.isnan(sell) determine the value is nan or not
    #    count_nonzero(np.isnan(sell)) counts how many nan we have
    #    if nan amount equal the length of array, we don't have sell record
    if not np.count_nonzero(np.isnan(sell)) == len(sell):
        plots.append(mpf.make_addplot(sell , type = 'scatter', color = 'red', marker = 'v', markersize = 100))
    if not np.count_nonzero(np.isnan(buy)) == len(buy):
        plots.append(mpf.make_addplot(buy , type = 'scatter', color = '#cfc01d', marker = '^', markersize = 200))

    # # make the graph(plot)
    mpf.plot(data,
             type='candle',   # candlestick chart of stock price
             volume=True,     # plots trading volume as well
             volume_panel=2,
             figscale=1.5,
             addplot = plots,
             style='yahoo', 
             panel_ratios = (3, 1, 0.8)
    )

In [77]:
import mplfinance as mpf

def bound_cross(data, plot=True):
    
    for i in range(MACD_EMA_SLOW, data.shape[0]):

        if(data['macd_DIF'][i] > 0 and data['macd_DEM'][i] > 0):
            # fast MACD  upcross slow MACD
            if((data['macd_DIF'][i-1] < data['macd_DEM'][i-1]) and
               (data['macd_DIF'][i]   > data['macd_DEM'][i]) and
               (data['macd_DIF'][i-1] < data['macd_DIF'][i])):

                data.loc[(data.index[i], 'buy')] = 1
                # print('\tbuy  |', data.index[i], data['Close'][i])
        if(data['macd_DIF'][i] < 0 and data['macd_DEM'][i] < 0):
            # fast MACD  downcross slow MACD
            if((data['macd_DIF'][i-1] > data['macd_DEM'][i-1]) and
               (data['macd_DIF'][i]   < data['macd_DEM'][i]) and
               (data['macd_DIF'][i-1] > data['macd_DIF'][i])):

                data.loc[(data.index[i], 'sell')] = 1
                # print('\tsell |', data.index[i], data['Close'][i])

    if plot:
        plot_trading(data) 
    return data

def middle_line(data, plot=True):
    
    
    # skip first RSI_n days
    for i in range(MACD_EMA_SLOW, data.shape[0]):


        if((data['macd_DIF'][i-1] < 0) and
            (data['macd_DIF'][i]   > 0) and
            (data['macd_DIF'][i-1] < data['macd_DIF'][i])):

            data.loc[(data.index[i], 'buy')] = 1
            # print('\tbuy  |', data.index[i], data['Close'][i])

        if((data['macd_DIF'][i-1] > 0) and
            (data['macd_DIF'][i]   < 0) and
            (data['macd_DIF'][i-1] > data['macd_DIF'][i])):

            data.loc[(data.index[i], 'sell')] = 1
            # print('\tsell |', data.index[i], data['Close'][i])
    
    if plot:
        plot_trading(data, bound='middle') 
    return data

## 3. 回測模塊

In [108]:
from termcolor import colored
def backtesting(data):
    # how many shares we hold, and the money balance
    have_shares , balance = 0, 0

    # multiple buy and 1 sell as "a trade"
    # record trading times and how many trade are making money
    make_money, trade_times = 0, 0
    
    if np.count_nonzero(data['buy']) == 0:
        return np.nan, np.nan
    
    # we use geometic average
    # this is products of return rate
    return_rate = 1
    for index, row in data.iterrows():

        if(row['buy'] == 1):
            balance -= row['Close']
            have_shares += 1
            print(colored('\tbuy ', 'green'),'|', colored(index, 'green'), '|', colored(row['Close'], 'green'))

        # sell signal or it reach ends
        elif((data.loc[index]['sell'] == 1 or index == data.index.values[-1])and
        # and we have some shares
              have_shares != 0):
            print(colored('\tsell', 'red'),'|', colored(index, 'red'), '|', colored(row['Close'],'red'), "*" , have_shares)
            

            principal = -balance
            balance += row['Close'] * have_shares

            # to calculate accuracy
            trade_times += 1
            if(balance > 0):
                make_money += 1

            # plus 1 avoiding get too small
            return_rate *= 1+(balance / principal)

            # reset balance & shares
            balance = 0
            have_shares = 0


    return_rate = (return_rate-1) * 100.0
    
    accuracy = 0.0
    if(trade_times):
        accuracy = make_money/ trade_times * 100

    return return_rate, accuracy

## 4. Main：執行上面的模塊

In [109]:
random_choice_num = 20
plot_trading = False
data_directory = '../../parse_data/data/splited/'
ticker_name_file=  '../../parse_data/data/ticker_name.csv'


strategy_list = [bound_cross, middle_line]
dictionary_rr = {}
dictionary_acc = {}

for data_file in file_list_with_directory(random_sample(ticker_list_file = ticker_name_file, times = random_choice_num), data_directory):
    
    slim_file_name = (data_file.split('/')[-1]).split('.')[0]

    print(data_file)
    ticker = get_dataframe(data_file, days=200)
    ticker = MACD_indicator(ticker)
    
    strategy_rr= []
    strategy_acc = []
    for i in range(len(strategy_list)):
        
        data = ticker.copy(deep=True)
        
        # initialize 'buy' & 'sell' column by filling with 0
        data['buy'] = np.zeros(ticker.shape[0])
        data['sell'] = np.zeros(ticker.shape[0])

        data = strategy_list[i](data, plot_trading)
        return_rate, acc = backtesting(data)
        
        if return_rate != np.nan:
            return_rate = (round(return_rate, 1))
            acc = (round(acc, 3))
        
        print('\treturn rate: {:>5.1f}% | acc: {:>5.1f}% | {}'.format(return_rate, acc, strategy_list[i].__name__))
        strategy_rr.append(str(return_rate)+'%')
        strategy_acc.append(str(acc)+'%')
        print()
    
    
    dictionary_rr[slim_file_name] = strategy_rr
    dictionary_acc[slim_file_name] = strategy_acc


strategy_names = [i.__name__ for i in strategy_list]
dataframe_return_rate = pd.DataFrame.from_dict(dictionary_rr, orient='index',columns = strategy_names)
dataframe_acc = pd.DataFrame.from_dict(dictionary_acc ,orient='index',columns = strategy_names)

print()
print('return rate:')
print(dataframe_return_rate)

print()
print('accuracy:')
print(dataframe_acc)


../../parse_data/data/splited/1225.csv
	return rate:   nan% | acc:   nan% | bound_cross

[32m	buy [0m | [32m2020-11-18 00:00:00[0m | [32m32.7[0m
[31m	sell[0m | [31m2020-12-07 00:00:00[0m | [31m31.65[0m * 1
[32m	buy [0m | [32m2020-12-18 00:00:00[0m | [32m32.45[0m
[31m	sell[0m | [31m2021-01-25 00:00:00[0m | [31m33.35[0m * 1
[32m	buy [0m | [32m2021-02-24 00:00:00[0m | [32m36.6[0m
[31m	sell[0m | [31m2021-03-26 00:00:00[0m | [31m46.0[0m * 1
	return rate:  25.0% | acc:  66.7% | middle_line

../../parse_data/data/splited/1707.csv
[32m	buy [0m | [32m2021-01-05 00:00:00[0m | [32m182.5[0m
[32m	buy [0m | [32m2021-01-08 00:00:00[0m | [32m181.0[0m
[32m	buy [0m | [32m2021-03-16 00:00:00[0m | [32m179.0[0m
[31m	sell[0m | [31m2021-03-26 00:00:00[0m | [31m180.5[0m * 3
	return rate:  -0.2% | acc:   0.0% | bound_cross

[32m	buy [0m | [32m2020-11-27 00:00:00[0m | [32m174.5[0m
[31m	sell[0m | [31m2021-01-21 00:00:00[0m | [31m174.0[0m * 1