# Download the required data file

In [1]:
import requests as rq

In [2]:
TICKER = 'SPY'
FILENAME = f'{TICKER.lower()}.csv.gz'
URL = f'https://github.com/crapher/medium/raw/main/25.BBGASeries/data/{FILENAME}'

In [3]:
response = rq.get(URL)

with open(FILENAME, "wb") as f:
    f.write(response.content)

# Install dependencies


In [4]:
!pip install pandas_ta pygad

Collecting pandas_ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━[0m [32m81.9/115.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pygad
  Downloading pygad-3.2.0-py3-none-any.whl (80 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.8/80.8 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pandas_ta
  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas_ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218907 sha256=4405dd0cec09f39f1c63e932f6d28b72c5f09bda328c4d0322d698892e173021
  Stored in directory: /root/.cache

# Import required packages

In [5]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import pygad

from tqdm import tqdm


# User Configuration

In [6]:
CASH = 10_000                       # Cash available for operations

BB_SMA = 20                         # Bollinger bands SMA
BB_STD = 2.0                        # Bollinger bands standard deviation
BB_MAX_BANDWIDTH = 5                # Bollinger bands maximum volatility allowed

DAYS_FOR_TESTING = 365 * 1.5        # Days used for testing
WINDOW_REWARD = '3M'                # Window used to calculate the reward of a solution
WINDOW_MIN_OPERATIONS = 21 * 3      # Minimum operations quantity required to calculate the reward

GENERATIONS = 50                    # Iterations count used by the genetic algorithm
SOLUTIONS = 20                      # Solutions / iteration calculated by the genetic algorithm

# Set constants, prepare configuration, and set output format

In [7]:
### Constants ###
TIMEFRAMES = ['5T','15T','1H']

In [8]:
### Data format & preparation ###
BB_SMA = int(BB_SMA)
BB_STD = round(BB_STD, 2)
BB_UPPER = f'BBU_{int(BB_SMA)}_{BB_STD}'
BB_LOWER = f'BBL_{int(BB_SMA)}_{BB_STD}'
BB_VOLATILITY = f'BBB_{int(BB_SMA)}_{BB_STD}'

DAYS_FOR_TESTING = int(DAYS_FOR_TESTING)
WINDOW_MIN_OPERATIONS = int(WINDOW_MIN_OPERATIONS)

In [9]:
### Output preparation ###
np.set_printoptions(suppress=True)
pd.options.mode.chained_assignment = None

# Data Functions

In [10]:
def get_data(ticker, timeframe):

    # Read data from file
    df = pd.read_csv(FILENAME)
    df['date'] = pd.to_datetime(df['date'])

    df = df.set_index('date').resample(timeframe).agg({'close':'last'}).dropna().reset_index()

    # Calculate bollinger bands based on configuration
    df.ta.bbands(close=df['close'], length=BB_SMA, std=BB_STD, append=True)
    df = df.dropna()

    # Calculate limits (lower: 25% - upper: 75%), close percentage, and volatility
    df['high_limit'] = df[BB_UPPER] + (df[BB_UPPER] - df[BB_LOWER]) / 2
    df['low_limit'] = df[BB_LOWER] - (df[BB_UPPER] - df[BB_LOWER]) / 2
    df['close_percentage'] = np.clip((df['close'] - df['low_limit']) / (df['high_limit'] - df['low_limit']), 0, 1)
    df['volatility'] = np.clip(df[BB_VOLATILITY] / (100 / BB_MAX_BANDWIDTH), 0, 1)

    # Remove all the bollinger bands fields that won't be needed from now on
    df = df.loc[:,~df.columns.str.startswith('BB')]

    # Split the data in train and test
    train = df[df['date'].dt.date <= (df['date'].dt.date.max() - pd.Timedelta(DAYS_FOR_TESTING, 'D'))]
    test = df[df['date'] > train['date'].max()]

    return train, test

In [11]:
def get_result(df, min_volatility, max_buy_perc, min_sell_perc):

    # Generate a copy to avoid changing the original data
    df = df.copy().reset_index(drop=True)

    # Buy signal
    df['signal'] = np.where((df['volatility'] > min_volatility) & (df['close_percentage'] < max_buy_perc), 1, 0)

    # Sell signal
    df['signal'] = np.where((df['close_percentage'] > min_sell_perc), -1, df['signal'])

    # Remove all rows without operations, rows with the same consecutive operation, first row selling, and last row buying
    result = df[df['signal'] != 0]
    result = result[result['signal'] != result['signal'].shift()]
    if (len(result) > 0) and (result.iat[0, -1] == -1): result = result.iloc[1:]
    if (len(result) > 0) and (result.iat[-1, -1] == 1): result = result.iloc[:-1]

    # Calculate pnl, wins, losses, and reward / operation
    result['pnl'] = np.where(result['signal'] == -1, (result['close'] - result['close'].shift()) * (CASH // result['close'].shift()), 0)
    result['wins'] = np.where(result['pnl'] > 0, 1, 0)
    result['losses'] = np.where(result['pnl'] < 0, 1, 0)

    # Remove bars without operations
    result = result[result['signal'] == -1]

    # Remove the signal column and return the dataset
    return result.drop('signal', axis=1)

In [12]:
def calculate_reward(df):

    # Generate window to calculate reward average
    df_reward = df.set_index('date').resample(WINDOW_REWARD).agg(
        {'close':'last','wins':'sum','losses':'sum','pnl':'sum'}).reset_index()

    # Generate reward
    wins = df_reward['wins'].mean() if len(df_reward) > 0 else 0
    losses = df_reward['losses'].mean() if len(df_reward) > 0 else 0
    reward = df_reward['pnl'].mean() if (WINDOW_MIN_OPERATIONS < (wins + losses)) else -WINDOW_MIN_OPERATIONS + (wins + losses)

    return reward

In [13]:
def show_result(df, name, show_monthly):

    # Calculate required values
    reward = calculate_reward(df)
    pnl = df['pnl'].sum()
    wins = df['wins'].sum() if len(df) > 0 else 0
    losses = df['losses'].sum() if len(df) > 0 else 0
    win_rate = (100 * (wins / (wins + losses)) if wins + losses > 0 else 0)
    max_profit = df['pnl'].max()
    min_drawdown = df['pnl'].min()
    avg_pnl = df['pnl'].mean()

    # Show the summarized result
    print(f' SUMMARIZED RESULT - {name} '.center(60, '*'))
    print(f'* Reward              : {reward:.2f}')
    print(f'* Profit / Loss       : {pnl:.2f}')
    print(f'* Wins / Losses       : {wins:.0f} / {losses:.0f} ({win_rate:.2f}%)')
    print(f'* Max Profit          : {max_profit:.2f}')
    print(f'* Max Drawdown        : {min_drawdown:.2f}')
    print(f'* Profit / Loss (Avg) : {avg_pnl:.2f}')

    # Show the monthly result
    if show_monthly:
        print(f' MONTHLY DETAIL RESULT '.center(60, '*'))
        df_monthly = df.set_index('date').resample('1M').agg(
            {'wins':'sum','losses':'sum','pnl':'sum'}).reset_index()
        df_monthly = df_monthly[['date','pnl','wins','losses']]
        df_monthly['year_month'] = df_monthly['date'].dt.strftime('%Y-%m')
        df_monthly = df_monthly.drop('date', axis=1)
        df_monthly = df_monthly.groupby(['year_month']).sum()
        df_monthly['win_rate'] = round(100 * df_monthly['wins'] / (df_monthly['wins'] + df_monthly['losses']), 2)

        print(df_monthly)

# Genetic Algorithm funcions

In [14]:
def fitness_func(self, solution, sol_idx):

    # Get reward from train data
    result = get_result(train, solution[0], solution[1], solution[2])

    # Return the solution reward
    return calculate_reward(result)

In [15]:
def get_best_solution():

    with tqdm(total=GENERATIONS) as pbar:

        # Create genetic algorithm
        ga_instance = pygad.GA(num_generations=GENERATIONS,
                               num_parents_mating=5,
                               fitness_func=fitness_func,
                               sol_per_pop=SOLUTIONS,
                               num_genes=3,
                               gene_space=[
                                {'low': 0, 'high': 1, 'step': 0.0001},
                                {'low': 0, 'high': 1, 'step': 0.0001},
                                {'low': 0, 'high': 1, 'step': 0.0001}],
                               parent_selection_type='sss',
                               crossover_type='single_point',
                               mutation_type='random',
                               mutation_num_genes=1,
                               keep_parents=-1,
                               random_seed=42,
                               on_generation=lambda _: pbar.update(1),
                               )

        # Run the genetic algorithm
        ga_instance.run()

    # Return the best solution
    return ga_instance.best_solution()[0]

# Main function

In [16]:
def main(ticker):

    global train

    for timeframe in TIMEFRAMES:

        # Get Train and Test data for timeframe
        train, test = get_data(ticker, timeframe)

        # Process timeframe
        print(''.center(60, '*'))
        print(f' PROCESSING {ticker.upper()} - TIMEFRAME {timeframe} '.center(60, '*'))
        print(''.center(60, '*'))

        solution = get_best_solution()

        print(f' Best Solution Parameters '.center(60, '*'))
        print(f'Min Volatility   : {solution[0]:6.4f}')
        print(f'Max Perc to Buy  : {solution[1]:6.4f}')
        print(f'Min Perc to Sell : {solution[2]:6.4f}')

        # Show the train result
        result = get_result(train, solution[0], solution[1], solution[2])
        show_result(result, f'TRAIN ({train["date"].min().date()} - {train["date"].max().date()})', False)

        # Show the test result
        result = get_result(test, solution[0], solution[1], solution[2])
        show_result(result, f'TEST ({test["date"].min().date()} - {test["date"].max().date()})', True)

        print('')

In [17]:
main(TICKER)

************************************************************
************** PROCESSING SPY - TIMEFRAME 5T ***************
************************************************************


100%|██████████| 50/50 [00:44<00:00,  1.13it/s]


***************** Best Solution Parameters *****************
Min Volatility   : 0.0064
Max Perc to Buy  : 0.7473
Min Perc to Sell : 0.7543
*** SUMMARIZED RESULT - TRAIN (2008-05-05 - 2022-06-01) ****
* Reward              : 366.97
* Profit / Loss       : 21284.23
* Wins / Losses       : 5603 / 1881 (74.87%)
* Max Profit          : 591.80
* Max Drawdown        : -1201.86
* Profit / Loss (Avg) : 2.84
**** SUMMARIZED RESULT - TEST (2022-06-02 - 2023-11-30) ****
* Reward              : 215.86
* Profit / Loss       : 1510.99
* Wins / Losses       : 613 / 223 (73.33%)
* Max Profit          : 246.00
* Max Drawdown        : -450.84
* Profit / Loss (Avg) : 1.81
****************** MONTHLY DETAIL RESULT *******************
                 pnl  wins  losses  win_rate
year_month                                  
2022-06    -552.3182    30      15     66.67
2022-07     761.6984    38      13     74.51
2022-08    -399.8973    29      16     64.44
2022-09    -770.0998    30      14     68.18
2022-10 

100%|██████████| 50/50 [00:19<00:00,  2.59it/s]


***************** Best Solution Parameters *****************
Min Volatility   : 0.0009
Max Perc to Buy  : 0.6783
Min Perc to Sell : 0.6881
*** SUMMARIZED RESULT - TRAIN (2008-05-05 - 2022-06-01) ****
* Reward              : 185.64
* Profit / Loss       : 10766.98
* Wins / Losses       : 3358 / 907 (78.73%)
* Max Profit          : 558.88
* Max Drawdown        : -1230.18
* Profit / Loss (Avg) : 2.52
**** SUMMARIZED RESULT - TEST (2022-06-02 - 2023-11-30) ****
* Reward              : -0.29
* Profit / Loss       : 478.32
* Wins / Losses       : 340 / 99 (77.45%)
* Max Profit          : 233.82
* Max Drawdown        : -615.36
* Profit / Loss (Avg) : 1.09
****************** MONTHLY DETAIL RESULT *******************
                 pnl  wins  losses  win_rate
year_month                                  
2022-06    -227.3081    16       3     84.21
2022-07     420.1112    20       3     86.96
2022-08    -369.0414    17       5     77.27
2022-09    -742.2330    16       7     69.57
2022-10     

100%|██████████| 50/50 [00:14<00:00,  3.38it/s]


***************** Best Solution Parameters *****************
Min Volatility   : 0.0098
Max Perc to Buy  : 0.9235
Min Perc to Sell : 0.6183
*** SUMMARIZED RESULT - TRAIN (2008-05-07 - 2022-06-01) ****
* Reward              : -36.26
* Profit / Loss       : 8247.12
* Wins / Losses       : 1278 / 273 (82.40%)
* Max Profit          : 358.40
* Max Drawdown        : -2047.04
* Profit / Loss (Avg) : 5.31
**** SUMMARIZED RESULT - TEST (2022-06-02 - 2023-11-30) ****
* Reward              : -39.14
* Profit / Loss       : 1359.36
* Wins / Losses       : 131 / 36 (78.44%)
* Max Profit          : 287.69
* Max Drawdown        : -807.36
* Profit / Loss (Avg) : 8.14
****************** MONTHLY DETAIL RESULT *******************
                 pnl  wins  losses  win_rate
year_month                                  
2022-06       6.9106    11       1     91.67
2022-07     131.8199     6       3     66.67
2022-08     240.9280     8       1     88.89
2022-09    -828.0285     2       3     40.00
2022-10    