In [61]:
# import important library
import pandas as pd
import numpy as np
import bot

In [2]:
import importlib
importlib.reload(bot)

<module 'bot' from '/Users/henrytran/Documents/GitHub/aias_trading25/bot.py'>

# 1. Data Processing

In [3]:
# read the csv file
daily_df=pd.read_csv('BTC-Daily.csv')
daily_df.head()

Unnamed: 0,unix,date,symbol,open,high,low,close,Volume BTC,Volume USD
0,1646092800,2022-03-01 00:00:00,BTC/USD,43221.71,43626.49,43185.48,43185.48,49.006289,2116360.0
1,1646006400,2022-02-28 00:00:00,BTC/USD,37717.1,44256.08,37468.99,43178.98,3160.61807,136472300.0
2,1645920000,2022-02-27 00:00:00,BTC/USD,39146.66,39886.92,37015.74,37712.68,1701.817043,64180080.0
3,1645833600,2022-02-26 00:00:00,BTC/USD,39242.64,40330.99,38600.0,39146.66,912.724087,35730100.0
4,1645747200,2022-02-25 00:00:00,BTC/USD,38360.93,39727.97,38027.61,39231.64,2202.851827,86421490.0


In [4]:
# convert time strings into timestamps, and take only year, month and date values
daily_df["date"]=pd.to_datetime(daily_df["date"]).dt.strftime("%Y-%m-%d")

In [5]:
daily_df.shape

(2651, 9)

The dataset has 2651 records from 2014 to 2022.

As the data is needed for training and testing, every records before 2020 will be used for training and every records after 2020 will be used for testing.

In [6]:
# splitting the data
training = daily_df[daily_df["date"]<"2020-01-01"].iloc[::-1]
testing = daily_df[daily_df["date"]>="2020-01-01"].iloc[::-1]

In [7]:
# convert the data into csv files
training.to_csv('training.csv')
testing.to_csv('testing.csv')

After splitting, the training dataset has 1860 records and the testing data has 791 records.

In [8]:
print(training.shape)
print(testing.shape)

(1860, 9)
(791, 9)


# 2. Hill-climbing algorithm

## a. Hill-climbing algorithm 

In [103]:
# The bounds will be 
# sma: [(5,50),(51,100)]
# smaema: [(5,50),(51,100),(0,1)]
# weights: [(0,10),(0,10),(0,10),(5,50),(5,50),(5,50),(0,1), (0,10),(0,10),(0,10),(51,100),(51,100),(51,100),(0,1)]

def hill_climbing(bot_type, bounds, max_iter=1000):
    rng = np.random.default_rng()
    high_window = None
    low_window = None
    alpha=0
    new_high_frequency_window = high_window 
    new_low_frequency_window = low_window
    new_alpha=0
    # processing the bounds inputs
    if bot_type.lower() == 'sma':
        high_window = int(rng.integers(bounds[0][0],bounds[0][1]))
        low_window = int(rng.integers(bounds[1][0],bounds[1][1]))
    elif bot_type.lower() == 'smaema':
        high_window = int(rng.integers(bounds[0][0],bounds[0][1]))
        low_window = int(rng.integers(bounds[1][0],bounds[1][1]))
        alpha = rng.uniform(bounds[2][0], bounds[-1][1])
    elif bot_type.lower() == 'complex':
        # generate the values for high
        weight_sma_high= int(rng.integers(bounds[0][0],bounds[0][1]))
        weight_lma_high= int(rng.integers(bounds[1][0],bounds[0][1]))
        weight_ema_high= int(rng.integers(bounds[2][0],bounds[0][1]))
        window_sma_high= int(rng.integers(bounds[3][0],bounds[0][1]))
        window_lma_high= int(rng.integers(bounds[4][0],bounds[0][1]))
        window_ema_high= int(rng.integers(bounds[5][0],bounds[0][1]))
        alpha_high = rng.uniform(bounds[6][0], bounds[6][1])
        high_window = [weight_sma_high,weight_lma_high, weight_ema_high, window_sma_high, window_lma_high, window_ema_high, alpha_high]
        # generate the values for low
        weight_sma_low= int(rng.integers(bounds[7][0],bounds[7][1]))
        weight_lma_low= int(rng.integers(bounds[8][0],bounds[8][1]))
        weight_ema_low= int(rng.integers(bounds[9][0],bounds[9][1]))
        window_sma_low= int(rng.integers(bounds[10][0],bounds[10][1]))
        window_lma_low= int(rng.integers(bounds[11][0],bounds[11][1]))
        window_ema_low= int(rng.integers(bounds[12][0],bounds[12][1]))
        alpha_low = rng.uniform(bounds[13][0], bounds[13][1])
        low_window = [weight_sma_low, weight_lma_low, weight_ema_low, window_sma_low, window_lma_low, window_ema_low, alpha_low]

    # the best cash
    cash1=0
    for i in range(max_iter):
        # find the correct parameter tweak:
        if bot_type.lower() == 'sma':
            new_high_frequency_window, new_low_frequency_window = parameter_tweak('sma', high_window, low_window)
        elif bot_type.lower() == 'smaema':
            new_high_frequency_window, new_low_frequency_window, new_alpha = parameter_tweak('smaema', high_window, low_window, alpha)
        elif bot_type.lower() == 'complex':
            # generate the values 
            new_high_frequency_window, new_low_frequency_window = parameter_tweak('complex',high_window, low_window)
            
        # run the total cash return after trading
        cash1=bot_fitness_func(bot_type, high_window, low_window, alpha)
        cash2=bot_fitness_func(bot_type, new_high_frequency_window, new_low_frequency_window, new_alpha)
        # compare the cash earned after tweaking the parameters
        if cash2 > cash1:
            high_window = new_high_frequency_window
            low_window = new_low_frequency_window
            alpha=new_alpha
            cash1 = cash2
    if bot_type.lower() == 'sma' or bot_type.lower() == 'complex':
        return [high_window, low_window], float(cash1)
    elif bot_type.lower() == 'smaema':
        return [high_window, low_window, alpha], float(cash1)

def parameter_tweak(bot_type, hfw, lfw, alpha =0):
    if bot_type.lower() == 'sma':
        new_hfw, new_lfw = window_tweak(hfw, lfw)
        return new_hfw, new_lfw
    elif bot_type.lower() == 'smaema' and alpha !=0:
        new_hfw, new_lfw = window_tweak(hfw, lfw)
        new_alpha = alpha_tweak(alpha)
        return new_hfw, new_lfw, new_alpha
    elif bot_type.lower() == 'complex':
        new_hfw, new_lfw = complex_tweak(hfw, lfw)
        return new_hfw, new_lfw

def window_tweak(hfw, lfw):
    rng = np.random.default_rng()
    for _ in range(100):
        # new high frequency window
        a=int(rng.integers(-5,6))
        new_hfw=hfw+a
        # new low frequency window
        b=int(rng.integers(-5,6))
        new_lfw=lfw+b
        # we check to make sure that new_hfw in range(11,40) and new_lfw(2,10)
        new_hfw = max(2, min(50, new_hfw)) # can change the bounds to 5-50 
        new_lfw = max(51, min(100, new_lfw)) # can change the bounds to 51-100
        if new_hfw < new_lfw:
            return new_hfw, new_lfw
    return hfw, lfw

def alpha_tweak(alpha):
    rng = np.random.default_rng()
    for _ in range(100):
        diff = rng.uniform(-0.15, 0.15)
        new_alpha = alpha + diff
        if 0<= new_alpha <= 1:
            return new_alpha
    return alpha # if after 100 loops and cannot find the optimal value, then we return the original alpha

def weights_tweak(weight_lst):
    rng = np.random.default_rng()
    # random index number of a weight in a list
    position=int(rng.integers(0,3))
    # normalised weights
    normalised_weights = [float(weight)/sum(weight_lst) for weight in weight_lst]

    # tweak the randomly selected weight with proportion different
    selected_weight_rate = normalised_weights[position]
    diff = np.random.uniform(-0.15, 0.15)
    selected_weight_rate += diff

    # the rate of other weights
    other_weight_idx= [idx for idx in range(3) if idx!=position]
    remainder_rate = 1- selected_weight_rate 
    # the remaining 2 weights, we generate randomly the proportion and multiply it with the current rates 
    one_weight_rate = rng.uniform(0.1, 0.9)
    another_weight_rate = 1- one_weight_rate
    other_weight_rates=[one_weight_rate, another_weight_rate]
    new_other_weight_rates = [remainder_rate * s for s in other_weight_rates]

    new_weights = [0,0,0]
    new_weights[position] = selected_weight_rate * sum(weight_lst)
    for i in range(2):
        idx = other_weight_idx[i]
        new_weights[idx] = new_other_weight_rates[i] * sum(weight_lst)

    return new_weights[0], new_weights[1], new_weights[2]


def complex_tweak(hfw, lfw):
    # in this code, hfw and lfw input are array-type in format of [w1, w2, w3, d1, d2, d3, sf]
    # weight tweak
    new_hfw_w1, new_hfw_w2,new_hfw_w3 = weights_tweak(hfw[0:3])
    new_lfw_w1, new_lfw_w2,new_lfw_w3 = weights_tweak(lfw[0:3])

    # window tweak
    new_hfw_d1, new_lfw_d1 = window_tweak(hfw[3], lfw[3])
    new_hfw_d2, new_lfw_d2 = window_tweak(hfw[4], lfw[4])
    new_hfw_d3, new_lfw_d3 = window_tweak(hfw[5], lfw[5])

    # alpha tweak
    new_hfw_alpha = alpha_tweak(hfw[-1])
    new_lfw_alpha = alpha_tweak(lfw[-1])

    return [new_hfw_w1, new_hfw_w2, new_hfw_w3, new_hfw_d1, new_hfw_d2, new_hfw_d3,new_hfw_alpha], [new_lfw_w1, new_lfw_w2, new_lfw_w3, new_lfw_d1, new_lfw_d2, new_lfw_d3, new_lfw_alpha]


## b. Fitness function on training and testing data

In [67]:
def bot_fitness_func(bot_type, high_window, low_window, alpha=0): # change bot_signals to high_frequency_window_size, low_frequency_window_size
    # intialise bot, use training dataset for optimisation algorithms fitness functions
    bot_signals=[]
    close_price = pd.read_csv('training.csv')['close']
    
    if bot_type.lower() == 'sma' and alpha ==0:
        bot_signals = bot.get_signals_sma2(close_price, high_window, low_window) #need to think about how to call 2 other bot algorithms
    elif bot_type.lower() == 'smaema' and alpha!=0:
        bot_signals = bot.get_signals_smaema(close_price, high_window, low_window, alpha)
    elif bot_type.lower() == 'complex' and alpha==0: #as alpha values were stored in the list of high_window and low_window
        bot_signals = bot.get_signals_complex(close_price, high_window, low_window)

    # initial values
    cash = 1000
    fee=0.03
    bitcoin = 0.0

    #loop through the time length
    for i in range(min(len(bot_signals),len(close_price)-1)):
        close=close_price.iloc[i]
        # buy, ensure we have cash to buy
        if bot_signals[i] == "buy" and cash>0:
            bitcoin =  (cash*(1-fee))/close
            cash = 0
        # sell, ensure we have bitcoin to sell
        elif bot_signals[i] == "sell" and bitcoin>0:
            cash = bitcoin * close * (1-fee)
            bitcoin =0
    
    # final evaluation to change back to cash
    last_close=close_price.iloc[-1]
    if bitcoin>0:
        cash = bitcoin * last_close * (1-fee)
        bitcoin =0
        return cash
    elif cash >0:
        return cash

In [99]:
def bot_testing(bot_type, optimal_values, alpha=0):
    # intialise bot, use training dataset for optimisation algorithms fitness functions
    bot_signals=[]
    close_price = pd.read_csv('testing.csv')['close']
    time = pd.read_csv('testing.csv')['date']
    result=[]

    # intialise bot, use training dataset for optimisation algorithms fitness functions
    if bot_type.lower() == 'sma' and alpha ==0:
        bot_signals = bot.get_signals_sma2(close_price, optimal_values[0], optimal_values[1])
    elif bot_type.lower() == 'smaema' and alpha!=0:
        bot_signals = bot.get_signals_smaema(close_price, optimal_values[0], optimal_values[1], optimal_values[2])
    elif bot_type.lower() == 'complex' and alpha==0: #as alpha values were stored in the list of high_window and low_window
        bot_signals = bot.get_signals_complex(close_price, optimal_values[0], optimal_values[1])
    
    # initial values
    cash = 1000
    fee=0.03
    bitcoin = 0.0
    
    #loop through the time length
    for i in range(min(len(bot_signals),len(close_price)-1)):
        close=close_price[i]
        # buy
        if bot_signals[i] == "buy" and cash>0:
            bitcoin =  (cash*(1-fee))/close
            cash = 0
            result.append([time[i],cash, bitcoin])
        # sell
        elif bot_signals[i] == "sell" and bitcoin>0:
            cash = bitcoin * close * (1-fee)
            bitcoin =0
            result.append([time[i],cash, bitcoin])
    
    # final evaluation to change back to cash
    last_close=close_price.iloc[-1]
    if bitcoin>0:
        cash = bitcoin * last_close * (1-fee)
        bitcoin =0
        result.append([time.iloc[-1],cash, bitcoin])
        return result
    elif cash>0:
        result.append([time.iloc[-1],cash, bitcoin])
        return result

# 3. Running the whole bot
## PART A: Training the bot to get the optimal result

In [100]:
#sma
sma_optimals, cash_result = hill_climbing('sma',[(5,50),(51,100)])
sma_optimals, cash_result

([50, 87], 22693.582411359766)

In [105]:
#smaema
samema_optimals, cash_result = hill_climbing('smaema', [(5,50),(51,100),(0,1)])
samema_optimals, cash_result

([42, 93, 0.6706570006612922], 9001.17284020968)

In [108]:
#complex
complex_optimals, cash_result = hill_climbing('complex',[(0,10),(0,10),(0,10),(5,50),(5,50),(5,50),(0,1), (0,10),(0,10),(0,10),(51,100),(51,100),(51,100),(0,1)])
complex_optimals, cash_result

([[2.1504862911438005,
   7.081781609383729,
   10.767732099472454,
   5,
   12,
   12,
   0.7985980675032072],
  [5.569622314857168,
   1.7910499968519256,
   3.639327688290903,
   99,
   63,
   95,
   0.7290797568119006]],
 21162.20872879772)

## PART B: Testing the bot with optimal results


In [72]:
def bot_evaluation(bot_type, optimal_values):
    # This function will returns the result nicely
    result_lst=bot_testing(bot_type, optimal_values)
    result_df=pd.DataFrame(result_lst, columns=["Time", "Cash", "Bitcoin"])
    print(result_df.to_string(index=False, justify="center", float_format='{:,.2f}'.format))
    #return result_df

In [104]:
#sma - nice printout
bot_evaluation('sma', sma_optimals)

   Time      Cash    Bitcoin
2020-01-04     0.00   0.13  
2020-03-27   816.83   0.00  
2020-05-15     0.00   0.09  
2020-10-09   912.73   0.00  
2020-10-31     0.00   0.06  
2021-05-22 2,330.99   0.00  
2021-08-14     0.00   0.05  
2021-12-22 2,263.01   0.00  
2022-03-01 2,263.01   0.00  


In [107]:
#smaema
bot_evaluation('smaema', samema_optimals)

   Time     Cash  Bitcoin
2022-03-01  1000   0.00  


In [109]:
#complex
bot_evaluation('complex',complex_optimals)

   Time      Cash    Bitcoin
2020-03-24     0.00   0.14  
2020-03-27   886.32   0.00  
2020-04-23     0.00   0.11  
2021-05-17 4,857.23   0.00  
2021-07-27     0.00   0.12  
2021-12-09 5,509.98   0.00  
2022-02-14     0.00   0.13  
2022-02-20 4,676.59   0.00  
2022-02-28     0.00   0.11  
2022-03-01 4,400.86   0.00  
