In [1]:
!pip install pandas_ta tqdm pygad

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandas_ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pygad
  Downloading pygad-3.0.1-py3-none-any.whl (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.0/68.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pandas_ta
  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas_ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218908 sha256=721e6bb528c629116781be20a17a9dcc83b16b0ccafa265cebd468a9ce7f5424
  Stored in directory: /root/.cache/pip/wheels/69/00/ac/f7fa862c34b0e2ef320175100c233377b4c558944f12474cf0
Successfully built pandas_ta
Installing collected packages: pygad, pandas_ta
Success

In [2]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import pygad

from tqdm import tqdm


In [3]:
try:
    from urllib.request import urlretrieve
except ImportError:
    from urllib import urlretrieve

# Download data
print(f'Downloading OIH_adjusted.txt...')
urlretrieve('http://api.kibot.com/?action=history&symbol=OIH&interval=1&unadjusted=0&bp=1&user=guest', 'OIH_adjusted.txt')

# Read data and assign names to the columns
df = pd.read_csv('OIH_adjusted.txt')
df.columns = ['date','time','open','high','low','close','volume']

# Combine date and time in the date column
df['date'] = df['date'] + ' ' + df['time']
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %H:%M')
df = df[['date','open','high','low','close','volume']]

# Sort by date and assign the date as index
df = df.sort_values('date').reset_index(drop=True).set_index('date')

# Convert the data to different timeframes & save them for future uses
AGGREGATION = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
TIMEFRAMES = ['5T', '15T', '1H', '1D']

for timeframe in TIMEFRAMES:
    print(f'Converting & Saving {timeframe} Data...')
    df = df.resample(timeframe).agg(AGGREGATION).dropna()
    df.to_csv(f'OIH_{timeframe}.csv.gz', compression='gzip')


Downloading OIH_adjusted.txt...
Converting & Saving 5T Data...
Converting & Saving 15T Data...
Converting & Saving 1H Data...
Converting & Saving 1D Data...


In [4]:
# Constants
SOLUTIONS = 20
GENERATIONS = 50
DAYS = 7
TIMEFRAMES = ['5T','15T','1H','1D']
LEN = {'5T': int(6.5 * DAYS * 12), '15T': int(6.5 * DAYS * 4), '1H': int(6.5 * DAYS), '1D': DAYS}

In [5]:
# Configuration
np.set_printoptions(suppress=True)
pd.options.mode.chained_assignment = None

In [6]:
def get_data(timeframe, length):

    # Read the data
    df = pd.read_csv(f'OIH_{timeframe}.csv.gz', compression='gzip')

    # Get close in LEN bars in the future
    df['close_future'] = df['close'].shift(-length)

    # Get High/Low in LEN bars in the future
    df['high_future'] = df['high'].shift(-length).rolling(length).max()
    df['low_future'] = df['low'].shift(-length).rolling(length).min()
    
    # Calculate Moving Volatility & Upper/Lower limits
    df['change'] = np.log(df['close'] / df['close'].shift())
    df['volatility'] = df['change'].rolling(length).agg(lambda c: c.std() * length ** .5)
    
    df['upper_limit'] = df['close'] * (1 + df['volatility'])
    df['lower_limit'] = df['close'] * (1 - df['volatility'])
    
    # Calculate Trend
    df['ema200'] = ta.ema(df['close'], length=200)
    df['ema50'] = ta.ema(df['close'], length=50)
    
    df['trend_up'] = df['ema200'] < df['ema50']
    
    # Clean all NaN values
    df = df.dropna()
    
    # Calculate the close percentage relative to limits 
    df['close_perc'] = np.clip((df['close_future'] - df['lower_limit']) / (df['upper_limit'] - df['lower_limit']), 0, 1)
    
    # Check values out of bounds
    df['out_of_bounds'] = ((df['high_future'] > df['upper_limit']) & (df['trend_up'] == True)) | ((df['low_future'] < df['lower_limit']) & (df['trend_up'] == False))
    
    # Split Train and Test datasets
    train = df[df['date'] < '2022-01-01']
    test = df[df['date'] >= '2022-01-01']

    return train, test

In [7]:
# Define fitness function to be used by the PyGAD instance
def fitness_func(self, solution, sol_idx):

    # Get Reward from train data
    total_reward, _, _, _ = get_result(train, solution[0], solution[1], solution[2])

    # Return the solution reward
    return total_reward

In [8]:
# Define a reward function
def get_result(df, min_volatility, bottom_perc, top_perc):
    
    # Get Total data Len
    total_len = len(df)
    
    # Filter data
    df = df[(df['close_perc'] > bottom_perc) & (df['close_perc'] < top_perc) & (df['volatility'] > min_volatility)]
    after_filter_len = len(df)
    
    # Get values under/over limit
    out_of_bounds = df['out_of_bounds'].sum()
    
    # Calculate Reward
    if after_filter_len > 0:
        in_bounds = after_filter_len - out_of_bounds
        percentage = in_bounds / after_filter_len
        total_reward = percentage * in_bounds - ((1 - percentage) * out_of_bounds)
    else:
        total_reward = -1

    return total_reward, total_len, after_filter_len, out_of_bounds

In [9]:
for timeframe in TIMEFRAMES:

    # Get Train and Test data for timeframe
    train, test = get_data(timeframe, LEN[timeframe])

    # Process timeframe
    print("".center(60, "*"))
    print(f' PROCESSING TIMEFRAME {timeframe} '.center(60, '*'))
    print("".center(60, "*"))

    with tqdm(total=GENERATIONS) as pbar:

        # Create Genetic Algorithm
        ga_instance = pygad.GA(num_generations=GENERATIONS,
                               num_parents_mating=5,
                               fitness_func=fitness_func,
                               sol_per_pop=SOLUTIONS,
                               num_genes=3,
                               gene_space=[{'low': 0, 'high':0.05}, {'low': 0, 'high':1}, {'low': 0, 'high':1}],
                               parent_selection_type="sss",
                               crossover_type="single_point",
                               mutation_type="random",
                               mutation_num_genes=1,
                               keep_parents=-1,
                               on_generation=lambda _: pbar.update(1),
                               )

        # Run the Genetic Algorithm
        ga_instance.run()

    # Show details of the best solution.
    solution, solution_fitness, _ = ga_instance.best_solution()

    print(f' Best Solution Parameters '.center(60, '*'))
    print(f"* Min Volatility       : {solution[0]:.4f}")
    print(f"* Bottom Perc.         : {solution[1]:.4f}")
    print(f"* Top Perc.            : {solution[2]:.4f}")

    # Get Reward from train data
    total_reward, total_len, after_filter_len, out_of_bounds = get_result(train, solution[0], solution[1], solution[2])

    print(f' Result for timeframe {timeframe} (TRAIN) '.center(60, '*'))
    print(f"* Total Records        : {total_len}")
    print(f"* Records after filter : {after_filter_len}")
    print(f"* Out Of Bounds        : {out_of_bounds} ({100 * (out_of_bounds / after_filter_len):.1f}%)")
    print(f"* Inside Bounds        : {after_filter_len - out_of_bounds} ({100*((after_filter_len - out_of_bounds) / after_filter_len):.1f}%)")

    # Get Reward from test data
    total_reward, total_len, after_filter_len, out_of_bounds = get_result(test, solution[0], solution[1], solution[2])

    # Show the final result
    print(f' Result for timeframe {timeframe} (TEST) '.center(60, '*'))
    print(f"* Total Records        : {total_len}")
    print(f"* Records after filter : {after_filter_len}")
    print(f"* Out Of Bounds        : {out_of_bounds} ({100 * (out_of_bounds / after_filter_len):.1f}%)")
    print(f"* Inside Bounds        : {after_filter_len - out_of_bounds} ({100*((after_filter_len - out_of_bounds) / after_filter_len):.1f}%)")

    print("")

************************************************************
***************** PROCESSING TIMEFRAME 5T ******************
************************************************************


100%|██████████| 50/50 [00:18<00:00,  2.70it/s]


***************** Best Solution Parameters *****************
* Min Volatility       : 0.0204
* Bottom Perc.         : 0.0212
* Top Perc.            : 0.9979
************* Result for timeframe 5T (TRAIN) **************
* Total Records        : 444894
* Records after filter : 311419
* Out Of Bounds        : 60828 (19.5%)
* Inside Bounds        : 250591 (80.5%)
************** Result for timeframe 5T (TEST) **************
* Total Records        : 16729
* Records after filter : 11675
* Out Of Bounds        : 1922 (16.5%)
* Inside Bounds        : 9753 (83.5%)

************************************************************
***************** PROCESSING TIMEFRAME 15T *****************
************************************************************


100%|██████████| 50/50 [00:07<00:00,  6.34it/s]


***************** Best Solution Parameters *****************
* Min Volatility       : 0.0176
* Bottom Perc.         : 0.0036
* Top Perc.            : 0.9896
************* Result for timeframe 15T (TRAIN) *************
* Total Records        : 166316
* Records after filter : 115174
* Out Of Bounds        : 24037 (20.9%)
* Inside Bounds        : 91137 (79.1%)
************* Result for timeframe 15T (TEST) **************
* Total Records        : 6312
* Records after filter : 4333
* Out Of Bounds        : 974 (22.5%)
* Inside Bounds        : 3359 (77.5%)

************************************************************
***************** PROCESSING TIMEFRAME 1H ******************
************************************************************


100%|██████████| 50/50 [00:02<00:00, 18.10it/s]


***************** Best Solution Parameters *****************
* Min Volatility       : 0.0145
* Bottom Perc.         : 0.0008
* Top Perc.            : 0.9940
************* Result for timeframe 1H (TRAIN) **************
* Total Records        : 52039
* Records after filter : 35221
* Out Of Bounds        : 7858 (22.3%)
* Inside Bounds        : 27363 (77.7%)
************** Result for timeframe 1H (TEST) **************
* Total Records        : 2016
* Records after filter : 1382
* Out Of Bounds        : 371 (26.8%)
* Inside Bounds        : 1011 (73.2%)

************************************************************
***************** PROCESSING TIMEFRAME 1D ******************
************************************************************


100%|██████████| 50/50 [00:01<00:00, 46.56it/s]

***************** Best Solution Parameters *****************
* Min Volatility       : 0.0189
* Bottom Perc.         : 0.0000
* Top Perc.            : 0.9735
************* Result for timeframe 1D (TRAIN) **************
* Total Records        : 5048
* Records after filter : 3155
* Out Of Bounds        : 703 (22.3%)
* Inside Bounds        : 2452 (77.7%)
************** Result for timeframe 1D (TEST) **************
* Total Records        : 191
* Records after filter : 131
* Out Of Bounds        : 36 (27.5%)
* Inside Bounds        : 95 (72.5%)




