In [1]:
!pip install pandas_ta tqdm pygad

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import pygad

from tqdm import tqdm


In [3]:
try:
    from urllib.request import urlretrieve
except ImportError:
    from urllib import urlretrieve

# Download data
print(f'Downloading OIH_adjusted.txt...')
urlretrieve('http://api.kibot.com/?action=history&symbol=OIH&interval=1&unadjusted=0&bp=1&user=guest', 'OIH_adjusted.txt')

# Read data and assign names to the columns
df = pd.read_csv('OIH_adjusted.txt')
df.columns = ['date','time','open','high','low','close','volume']

# Combine date and time in the date column
df['date'] = df['date'] + ' ' + df['time']
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')
df = df[['date','open','high','low','close','volume']]

# Sort by date and assign the date as index
df = df.sort_values('date').reset_index(drop=True).set_index('date')

# Convert the data to different timeframes & save them for future uses
AGGREGATION = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
TIMEFRAMES = ['5T', '15T', '1H', '1D']

for timeframe in TIMEFRAMES:
    print(f'Converting & Saving {timeframe} Data...')
    df = df.resample(timeframe).agg(AGGREGATION).dropna()
    df.to_csv(f'OIH_{timeframe}.csv.gz', compression='gzip')


Downloading OIH_adjusted.txt...
Converting & Saving 5T Data...
Converting & Saving 15T Data...
Converting & Saving 1H Data...
Converting & Saving 1D Data...


In [4]:
# Constants
SOLUTIONS = 20
GENERATIONS = 50
TIMEFRAMES = ['5T','15T','1H','1D']

In [5]:
# Configuration
np.set_printoptions(suppress=True)
pd.options.mode.chained_assignment = None

In [6]:
# Loading data, and split in train and test datasets
def get_data(timeframe):

    df = pd.read_csv(f'OIH_{timeframe}.csv.gz', compression='gzip')
    df.ta.bbands(close=df['close'], length=20, append=True)
    df = df.dropna()
    df['high_limit'] = df['BBU_20_2.0'] + (df['BBU_20_2.0'] - df['BBL_20_2.0']) / 2
    df['low_limit'] = df['BBL_20_2.0'] - (df['BBU_20_2.0'] - df['BBL_20_2.0']) / 2
    df['close_percentage'] = np.clip((df['close'] - df['low_limit']) / (df['high_limit'] - df['low_limit']), 0, 1)
    df['volatility'] = df['BBU_20_2.0'] / df['BBL_20_2.0'] - 1

    train = df[df['date'] < '2022-01-01']
    test = df[df['date'] >= '2022-01-01']

    return train, test

In [7]:
# Define fitness function to be used by the PyGAD instance
def fitness_func(self, solution, sol_idx):

    # Get Reward from train data
    total_reward, _, _ = get_result(train, solution[0], solution[1], solution[2])

    # Return the solution reward
    return total_reward

In [8]:
# Define a reward function
def get_result(df, min_volatility, max_buy_perc, min_sell_perc):

    # Generate a copy to avoid changing the original data
    df = df.copy().reset_index(drop=True)

    # Buy Signal
    df['signal'] = np.where((df['volatility'] > min_volatility) & (df['close_percentage'] < max_buy_perc), 1, 0)

    # Sell Signal
    df['signal'] = np.where((df['close_percentage'] > min_sell_perc), -1, df['signal'])

    # Remove all rows without operations, rows with the same consecutive operation, first row selling, and last row buying
    result = df[df['signal'] != 0]
    result = result[result['signal'] != result['signal'].shift()]
    if (len(result) > 0) and (result.iat[0, -1] == -1): result = result.iloc[1:]
    if (len(result) > 0) and (result.iat[-1, -1] == 1): result = result.iloc[:-1]

    # Calculate the reward / operation
    result['total_reward'] = np.where(result['signal'] == -1, result['close'] - result['close'].shift(), 0)

    # Generate the result
    total_reward = result['total_reward'].sum()
    wins = len(result[result['total_reward'] > 0])
    losses = len(result[result['total_reward'] < 0])
    
    return total_reward, wins, losses

In [9]:
for timeframe in TIMEFRAMES:

    # Get Train and Test data for timeframe
    train, test = get_data(timeframe)

    # Process timeframe
    print("".center(60, "*"))
    print(f' PROCESSING TIMEFRAME {timeframe} '.center(60, '*'))
    print("".center(60, "*"))

    with tqdm(total=GENERATIONS) as pbar:

        # Create Genetic Algorithm
        ga_instance = pygad.GA(num_generations=GENERATIONS,
                               num_parents_mating=5,
                               fitness_func=fitness_func,
                               sol_per_pop=SOLUTIONS,
                               num_genes=3,
                               gene_space=[{'low': 0, 'high':1}, {'low': 0, 'high':1}, {'low': 0, 'high':1}],
                               parent_selection_type="sss",
                               crossover_type="single_point",
                               mutation_type="random",
                               mutation_num_genes=1,
                               keep_parents=-1,
                               on_generation=lambda _: pbar.update(1),
                               )

        # Run the Genetic Algorithm
        ga_instance.run()

    # Show details of the best solution.
    solution, solution_fitness, _ = ga_instance.best_solution()

    #print(f"Fitness value of the best solution: {solution_fitness:10.2f}")
    print(f' Best Solution Parameters '.center(60, '*'))
    print(f"Min Volatility   : {solution[0]:6.4f}")
    print(f"Min Perc to Buy  : {solution[1]:6.4f}")
    print(f"Max Perc to Sell : {solution[2]:6.4f}")

    # Get Reward from train data
    profit, wins, losses = get_result(train, solution[0], solution[1], solution[2])

    print(f' Result for timeframe {timeframe} (TRAIN) '.center(60, '*'))
    print(f"* Profit / Loss  : {profit:.2f}")
    print(f"* Wins / Losses  : {wins} / {losses}")
    print(f"* Win Rate       : {(100 * (wins/(wins + losses)) if wins + losses > 0 else 0):.2f}%")

    # Get Reward from test data
    profit, wins, losses = get_result(test, solution[0], solution[1], solution[2])

    # Show the final result
    print(f' Result for timeframe {timeframe} (TEST) '.center(60, '*'))
    print(f"* Profit / Loss  : {profit:.2f}")
    print(f"* Wins / Losses  : {wins} / {losses}")
    print(f"* Win Rate       : {(100 * (wins/(wins + losses)) if wins + losses > 0 else 0):.2f}%")

    print("")

************************************************************
***************** PROCESSING TIMEFRAME 5T ******************
************************************************************


100%|██████████| 50/50 [01:43<00:00,  2.07s/it]


***************** Best Solution Parameters *****************
Min Volatility   : 0.0026
Min Perc to Buy  : 0.8119
Max Perc to Sell : 0.6306
************* Result for timeframe 5T (TRAIN) **************
* Profit / Loss  : 5205.69
* Wins / Losses  : 21253 / 5394
* Win Rate       : 79.76%
************** Result for timeframe 5T (TEST) **************
* Profit / Loss  : 198.23
* Wins / Losses  : 893 / 231
* Win Rate       : 79.45%

************************************************************
***************** PROCESSING TIMEFRAME 15T *****************
************************************************************


100%|██████████| 50/50 [00:31<00:00,  1.57it/s]


***************** Best Solution Parameters *****************
Min Volatility   : 0.0051
Min Perc to Buy  : 0.7016
Max Perc to Sell : 0.5965
************* Result for timeframe 15T (TRAIN) *************
* Profit / Loss  : 3132.70
* Wins / Losses  : 7763 / 2070
* Win Rate       : 78.95%
************* Result for timeframe 15T (TEST) **************
* Profit / Loss  : 46.90
* Wins / Losses  : 276 / 85
* Win Rate       : 76.45%

************************************************************
***************** PROCESSING TIMEFRAME 1H ******************
************************************************************


100%|██████████| 50/50 [00:12<00:00,  3.91it/s]


***************** Best Solution Parameters *****************
Min Volatility   : 0.0096
Min Perc to Buy  : 0.6363
Max Perc to Sell : 0.5166
************* Result for timeframe 1H (TRAIN) **************
* Profit / Loss  : 671.25
* Wins / Losses  : 2143 / 676
* Win Rate       : 76.02%
************** Result for timeframe 1H (TEST) **************
* Profit / Loss  : 2.23
* Wins / Losses  : 76 / 23
* Win Rate       : 76.77%

************************************************************
***************** PROCESSING TIMEFRAME 1D ******************
************************************************************


100%|██████████| 50/50 [00:06<00:00,  8.17it/s]


***************** Best Solution Parameters *****************
Min Volatility   : 0.2139
Min Perc to Buy  : 0.5089
Max Perc to Sell : 0.8034
************* Result for timeframe 1D (TRAIN) **************
* Profit / Loss  : 618.20
* Wins / Losses  : 17 / 10
* Win Rate       : 62.96%
************** Result for timeframe 1D (TEST) **************
* Profit / Loss  : 52.03
* Wins / Losses  : 1 / 0
* Win Rate       : 100.00%

