
# Import Data

Importing AXP data.

In [19]:

import pandas as pd

amex_df = pd.read_csv('../../data/gen/usAXP_df.csv',
                           parse_dates=True,
                           index_col=0)
# Strip whitespace from column names
amex_df.columns = amex_df.columns.str.strip()
# Print
amex_df.head()


Unnamed: 0,close,high,low,p_change,open,pre_close,volume,date,date_week,atr21,atr14,key
,,,,,,,,,,,,
2017-06-12,80.17,80.73,79.95,-0.174,80.35,80.31,3352279.0,20170612.0,0.0,0.78,0.78,0.0
2017-06-13,80.59,80.74,80.07,0.524,80.2,80.17,3174361.0,20170613.0,1.0,0.722381,0.721071,1.0
2017-06-14,80.84,80.92,79.62,0.31,80.11,80.59,4013089.0,20170614.0,2.0,0.933535,0.942224,2.0
2017-06-15,80.7,81.24,80.23,-0.173,80.38,80.84,2773369.0,20170615.0,3.0,0.955464,0.962959,3.0
2017-06-16,81.45,81.48,80.77,0.929,80.86,80.7,5914676.0,20170616.0,4.0,0.896598,0.896962,4.0



# Even Faster Approach: Using Numba

Optimize Calculation using Numba

## Processing Data

Adding slope & velocity to each day.


In [20]:

import numpy as np

extra_df = amex_df.copy()
extra_df['slope'] = pd.Series(np.gradient(extra_df.close), extra_df.index, name='slope')
extra_df['velocity'] = pd.Series(np.gradient(extra_df.slope), extra_df.index, name='velocity')
# Print
extra_df.head()


Unnamed: 0,close,high,low,p_change,open,pre_close,volume,date,date_week,atr21,atr14,key,slope,velocity
,,,,,,,,,,,,,,
2017-06-12,80.17,80.73,79.95,-0.174,80.35,80.31,3352279.0,20170612.0,0.0,0.78,0.78,0.0,0.42,-0.085
2017-06-13,80.59,80.74,80.07,0.524,80.2,80.17,3174361.0,20170613.0,1.0,0.722381,0.721071,1.0,0.335,-0.1825
2017-06-14,80.84,80.92,79.62,0.31,80.11,80.59,4013089.0,20170614.0,2.0,0.933535,0.942224,2.0,0.055,-0.015
2017-06-15,80.7,81.24,80.23,-0.173,80.38,80.84,2773369.0,20170615.0,3.0,0.955464,0.962959,3.0,0.305,0.2675
2017-06-16,81.45,81.48,80.77,0.929,80.86,80.7,5914676.0,20170616.0,4.0,0.896598,0.896962,4.0,0.59,0.1125



## Performing Backtests

1. Prepare Parameters
1. Dfine a DataFrame to handle results
1. For Loops
1. Prepare Maps & Filters

## Preparing Parameters


In [21]:

import itertools

buy_slope_threshold_list = np.arange(-0.5, 0.5, 0.1)
buy_velocity_threshold_list = np.arange(-0.5, 0.5, 0.1)
close_slope_threshold_list = np.arange(-0.2, 0.2, 0.1)
close_velocity_threshold_list = np.arange(-0.2, 0.2, 0.1)
sell_slope_threshold_list = np.arange(-0.5, 0.5, 0.1)
sell_velocity_threshold_list = np.arange(-0.5, 0.5, 0.1)

task_list = list(itertools.product(
    buy_slope_threshold_list, 
    buy_velocity_threshold_list, 
    close_slope_threshold_list, 
    close_velocity_threshold_list, 
    sell_slope_threshold_list, 
    sell_velocity_threshold_list
))
print("Params Ready, {} tasks to run. ".format(len(task_list)))


Params Ready, 160000 tasks to run. 



## MultiProcess with Numpy


In [24]:

import concurrent.futures
from concurrent.futures.process import ProcessPoolExecutor
from datetime import datetime
from tqdm import tqdm
    
# Defining a DataFrame to handle results
final_result = pd.DataFrame(columns=['buy_slope_threshold', 'buy_velocity_threshold', 'close_slope_threshold', 'close_velocity_threshold', 'sell_slope_threshold', 'sell_velocity_threshold', 'profit_percentage'])

def single_test(data_df, 
                buy_slope_threshold, 
                buy_velocity_threshold, 
                close_slope_threshold, 
                close_velocity_threshold, 
                sell_slope_threshold, 
                sell_velocity_threshold):
        
        # Prepare Maps & Filters 
        
        long_mask = (data_df.slope > buy_slope_threshold) & (data_df.velocity > buy_velocity_threshold)
        long_close = (data_df.slope < -close_slope_threshold) & (data_df.velocity < -close_velocity_threshold)
        long_series = data_df.p_change[long_mask]
        long_array = long_series.values
        long_profit = np.product(long_array/100 + 1) - 1
        
        short_mask = (data_df.slope < sell_slope_threshold) & (data_df.velocity < sell_velocity_threshold)
        short_close = (data_df.slope > close_slope_threshold) & (data_df.velocity > close_velocity_threshold)
        short_series = data_df.p_change[short_mask]
        short_array = short_series.values
        short_profit = np.abs(np.product(short_array/100 - 1)) - 1
    
        trade_profit = long_profit + short_profit
    
        test_result = [buy_slope_threshold, 
                       buy_velocity_threshold, 
                       close_slope_threshold, 
                       close_velocity_threshold, 
                       sell_slope_threshold, 
                       sell_velocity_threshold, 
                       trade_profit * 100]
        return test_result

start_time = datetime.now()

with ProcessPoolExecutor() as executor:
    """
    By default, ProcessPool uses maximum available number of cores to process.
    """
    
    """
    Equivalent to executor.map(fn, *iterables),
    but displays a tqdm-based progress bar.
    
    Does not support timeout or chunksize as executor.submit is used internally
    
    **kwargs is passed to tqdm.
    """
    futures_list = []
    kwargs = {
        'total': len(futures_list),
        'mininterval': 30.0,
        'unit': 'tests',
        'unit_scale': True,
        'leave': True
    }
        
    futures_list = [executor.submit(single_test, 
                                    extra_df,
                                    buy_slope_threshold, 
                                    buy_velocity_threshold, 
                                    close_slope_threshold, 
                                    close_velocity_threshold, 
                                    sell_slope_threshold, 
                                    sell_velocity_threshold) for buy_slope_threshold, \
                                                                 buy_velocity_threshold, \
                                                                 close_slope_threshold, \
                                                                 close_velocity_threshold, \
                                                                 sell_slope_threshold, \
                                                                 sell_velocity_threshold in task_list]
    print("Running {} tasks".format(len(futures_list)))
    
    for f in tqdm(concurrent.futures.as_completed(futures_list), **kwargs):
        # done_callback runs on the main process
        result_row = f.result()
        
        final_result = final_result.append(pd.Series(result_row, index=final_result.columns), ignore_index=True)

    print("Finished {} tasks in {}".format(len(task_list), datetime.now() - start_time))


0.00tests [00:00, ?tests/s]13.7ktests [00:30, 457tests/s]13.7ktests [00:40, 457tests/s]22.1ktests [01:00, 384tests/s]22.1ktests [01:10, 384tests/s]30.2ktests [01:30, 341tests/s]30.2ktests [01:40, 341tests/s]39.7ktests [02:00, 333tests/s]39.7ktests [02:10, 333tests/s]48.6ktests [02:30, 322tests/s]48.6ktests [02:40, 322tests/s]57.2ktests [03:00, 309tests/s]57.2ktests [03:10, 309tests/s]65.0ktests [03:30, 293tests/s]65.0ktests [03:40, 293tests/s]72.3ktests [04:00, 277tests/s]72.3ktests [04:10, 277tests/s]81.4ktests [04:30, 284tests/s]81.4ktests [04:40, 284tests/s]99.0ktests [05:00, 336tests/s]99.0ktests [05:10, 336tests/s]115ktests [05:30, 376tests/s] 115ktests [05:40, 376tests/s]129ktests [06:00, 400tests/s]129ktests [06:10, 400tests/s]142ktests [06:30, 414tests/s]142ktests [06:40, 414tests/s]155ktests [07:00, 413tests/s]155ktests [07:10, 413tests/s]160ktests [07:14, 368tests/s]


Running 160000 tasks
Finished 160000 tasks in 0:07:22.714005



# Analysing Results

In [None]:
        
final_result = final_result.sort_values(by=['profit_percentage'],
                                        ascending=False)
final_result.head()



## MultiProcess with Numpy + Namba @JIT


In [26]:

import concurrent.futures
from numba import jit, prange
from concurrent.futures.process import ProcessPoolExecutor
from datetime import datetime
from tqdm import tqdm

# Defining a DataFrame to handle results
final_result_2 = pd.DataFrame(columns=['buy_slope_threshold', 'buy_velocity_threshold', 'close_slope_threshold', 'close_velocity_threshold', 'sell_slope_threshold', 'sell_velocity_threshold', 'profit_percentage'])

@jit(nopython=True, parallel=True)
def calc_long_profit(array):
    array = array / 100 + 1
    return array.prod() - 1

@jit(nopython=True, parallel=True)
def calc_short_profit(array):
    array = array / 100 - 1
    return array.prod() - 1

@jit
def single_test(data_df, 
                buy_slope_threshold, 
                buy_velocity_threshold, 
                close_slope_threshold, 
                close_velocity_threshold, 
                sell_slope_threshold, 
                sell_velocity_threshold):
        
        # Prepare Maps & Filters 
        
        long_mask = (data_df.slope > buy_slope_threshold) & (data_df.velocity > buy_velocity_threshold)
        long_close = (data_df.slope < -close_slope_threshold) & (data_df.velocity < -close_velocity_threshold)
        long_series = data_df.p_change[long_mask]
        long_array = long_series.values
        long_profit = calc_long_profit(long_array)
        
        short_mask = (data_df.slope < sell_slope_threshold) & (data_df.velocity < sell_velocity_threshold)
        short_close = (data_df.slope > close_slope_threshold) & (data_df.velocity > close_velocity_threshold)
        short_series = data_df.p_change[short_mask]
        short_array = short_series.values
        short_profit = calc_short_profit(short_array)
    
        trade_profit = long_profit + short_profit
    
        test_result = [buy_slope_threshold, 
                       buy_velocity_threshold, 
                       close_slope_threshold, 
                       close_velocity_threshold, 
                       sell_slope_threshold, 
                       sell_velocity_threshold, 
                       trade_profit * 100]
        return test_result

start_time = datetime.now()

with ProcessPoolExecutor() as executor:
    """
    By default, ProcessPool uses maximum available number of cores to process.
    """
    
    """
    Equivalent to executor.map(fn, *iterables),
    but displays a tqdm-based progress bar.
    
    Does not support timeout or chunksize as executor.submit is used internally
    
    **kwargs is passed to tqdm.
    """
    futures_list = []
    kwargs = {
        'total': len(futures_list),
        'mininterval': 30.0,
        'unit': 'tests',
        'unit_scale': True,
        'leave': True
    }
    
    futures_list = []
    
    for i in prange(len(task_list)):
        
        buy_slope_threshold, \
        buy_velocity_threshold, \
        close_slope_threshold, \
        close_velocity_threshold, \
        sell_slope_threshold, \
        sell_velocity_threshold = task_list[i]
        
        futures_list.append(executor.submit(single_test, 
                                            extra_df,
                                            buy_slope_threshold, 
                                            buy_velocity_threshold, 
                                            close_slope_threshold, 
                                            close_velocity_threshold, 
                                            sell_slope_threshold, 
                                            sell_velocity_threshold))
        
    print("Running {} tasks".format(len(futures_list)))
    
    for f in tqdm(concurrent.futures.as_completed(futures_list), **kwargs):
        # done_callback runs on the main process
        result_row = f.result()
        
        final_result_2 = final_result_2.append(pd.Series(result_row, index=final_result_2.columns), ignore_index=True)

    print("Finished {} tasks in {}".format(len(task_list), datetime.now() - start_time))


Compilation is falling back to object mode WITH looplifting enabled because Function "single_test" failed type inference due to: [1m[1mnon-precise type pyobject[0m
[0m[1m[1] During: typing of argument at <ipython-input-26-7d085320e61c> (32)[0m
[1m
File "<ipython-input-26-7d085320e61c>", line 32:[0m
[1mdef single_test(data_df, 
    <source elided>
        
[1m        long_mask = (data_df.slope > buy_slope_threshold) & (data_df.velocity > buy_velocity_threshold)
[0m        [1m^[0m[0m
[0m
  @jit
Compilation is falling back to object mode WITH looplifting enabled because Function "single_test" failed type inference due to: [1m[1mnon-precise type pyobject[0m
[0m[1m[1] During: typing of argument at <ipython-input-26-7d085320e61c> (32)[0m
[1m
File "<ipython-input-26-7d085320e61c>", line 32:[0m
[1mdef single_test(data_df, 
    <source elided>
        
[1m        long_mask = (data_df.slope > buy_slope_threshold) & (data_df.velocity > buy_velocity_threshold)
[0m        

Running 160000 tasks
Finished 160000 tasks in 0:08:42.339109



# Analysing Results


In [27]:

final_result_2 = final_result.sort_values(by=['profit_percentage'],
                                        ascending=False)
final_result_2.head()


Unnamed: 0,buy_slope_threshold,buy_velocity_threshold,close_slope_threshold,close_velocity_threshold,sell_slope_threshold,sell_velocity_threshold,profit_percentage
97358,0.1,-0.5,0.1,-0.1,-1.110223e-16,0.4,177.025468
97259,0.1,-0.5,0.1,-0.2,-1.110223e-16,0.4,177.025468
96759,0.1,-0.5,-0.1,0.1,-1.110223e-16,0.4,177.025468
96959,0.1,-0.5,0.0,-0.1,-1.110223e-16,0.4,177.025468
96259,0.1,-0.5,-0.2,0.0,-1.110223e-16,0.4,177.025468
