In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import random
from deap import base
from deap import creator
from deap import tools
from deap import algorithms
import itertools

# FIXED PARAMETERS
POPULATION_SIZE = 64 # number of individuals in population
GENERATIONS = 10000   # number of generations (evaluation rounds)
MAX_RUNS = 30 # number of runs with different random seeds

# HYPERPARAMETERS
CROSSOVER_PROBABILITY = 0.8 # probability of crossover operation
MUTATION_PROBABILITY = 0.2 # probability of mutation operation
TOURNAMENT_SIZE = 3 # number of individuals participating in tournament selection


# create the fitness function
creator.create("FitnessMax", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# define the parameters
rsi_periods = [7, 14, 21]
lower_range = list(range(0, 101, 5))
upper_range = list(range(0, 101, 5))

def process_data(data):
    # only relevant from 01/01/2020 onwards to 31/12/2022 (3 years)
    # convert to datetime format
    data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')
    data = data[data['Date'] >= '01-01-2020']
    data = data[data['Date'] <= '31-12-2022']

    # calculate the difference between consecutive values in the 'Close' column
    data['Value_Diff'] = data['Close'].diff()

    # create 'Gain' and 'Loss' columns based on the 'Value_Diff'
    data['Gain'] = data['Value_Diff'].apply(lambda x: max(0, x))
    data['Loss'] = data['Value_Diff'].apply(lambda x: max(0, -x))

    # calculate the rolling sum of 'Gain' and 'Loss' for a 7-day period
    data['Average Gain'] = data['Gain'].rolling(window=7).mean()
    data['Average Loss'] = data['Loss'].rolling(window=7).mean()

    # calculate the Relative Strength (RS)
    data['RS'] = data['Average Gain']/data['Average Loss']

    # calculate the Relative Strength Index (RSI)
    data['RSI'] = 100 - (100/(1+data['RS']))

    RSI = data['RSI'].values
    price = data['Close'].values
    date = data['Date'].values
    # Reset the index of the DataFrame
    data.reset_index(drop=True, inplace=True)
    return data, RSI, price, date

def evaluate_individual(individual):
    lower_long, upper_long, lower_short, upper_short = individual
    roi_long = (upper_long-lower_long)/(lower_long)*100 
    roi_short = (upper_short-lower_short)/(upper_short)*100
    average_roi = (roi_long + roi_short)/2
    return average_roi



# create the toolbox
toolbox = base.Toolbox()

toolbox.register("rsi_period_short", random.choice, rsi_periods)
toolbox.register("rsi_period_long", random.choice, rsi_periods)
toolbox.register("lower_short", random.choice, lower_range)
toolbox.register("upper_short", random.choice, upper_range)
toolbox.register("lower_long", random.choice, lower_range)
toolbox.register("upper_long", random.choice, upper_range)

toolbox.register("individual", tools.initIterate, creator.Individual, lambda: (toolbox.rsi_period_short(), toolbox.rsi_period_long(), toolbox.lower_short(), toolbox.upper_short(), toolbox.lower_long(), toolbox.upper_long()))
toolbox.register("population",tools.initRepeat, list, toolbox.individual)

toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate_individual)





def algorithm(data, RSI, price, date):
    overall = []
    seed = 30
    best_individual = None
    best_fitness = None
    for i in range(MAX_RUNS):
        random.seed(i) # set random seed for each run
        print('--Run: {0}--\n'.format(i))
        # create the initial population
        pop = toolbox.population(n=POPULATION_SIZE) # create the initial population
        print(pop)
        print('Starting evolution...')

        # fitnesses = list(map(evaluate_individual, pop)) # evaluate the individuals
        # for ind, fit in zip(pop, fitnesses):
        #     ind.fitness.values = fit
    pass


folder_path = 'data'

file_list = os.listdir(folder_path)

data_dictionary = {}
RSI_dictionary = {}
price_dictionary = {}
date_dictionary = {}

for file_name in file_list:
    if file_name.endswith('.csv'):
        # extract the base name of the file (without the extension)
        df_name = os.path.splitext(file_name)[0]
        # construct the full file path
        file_path = os.path.join(folder_path, file_name)
            # read the CSV file into a DataFrame with the base name as the variable name
        data_dictionary[df_name] = pd.read_csv(file_path, sep=';', usecols=['Date', 'Close'])
        data_dictionary[df_name], RSI_dictionary[df_name], price_dictionary[df_name], date_dictionary[df_name] = process_data(data_dictionary[df_name])
for df_name in data_dictionary:
    algorithm(data_dictionary[df_name], RSI_dictionary[df_name], price_dictionary[df_name], date_dictionary[df_name])



def main():
    pass

if __name__ == "__main__":
    main()


--Run: 0--

[[14, 14, 5, 40, 80, 75], [14, 14, 75, 55, 90, 30], [21, 7, 45, 20, 15, 95], [14, 21, 95, 20, 45, 15], [21, 7, 50, 75, 85, 15], [14, 14, 50, 95, 100, 30], [21, 14, 70, 80, 40, 5], [21, 7, 10, 60, 100, 0], [21, 14, 50, 35, 50, 10], [7, 21, 35, 35, 20, 85], [14, 7, 10, 50, 80, 75], [7, 14, 85, 45, 15, 85], [14, 21, 30, 95, 85, 90], [14, 14, 10, 95, 60, 50], [21, 7, 45, 25, 30, 25], [7, 21, 40, 75, 10, 10], [21, 7, 20, 5, 10, 85], [21, 14, 80, 40, 80, 35], [7, 21, 90, 65, 90, 40], [14, 14, 100, 55, 10, 50], [21, 7, 75, 90, 100, 50], [7, 7, 0, 40, 15, 35], [14, 7, 50, 65, 5, 15], [7, 21, 35, 5, 90, 100], [21, 21, 10, 0, 15, 100], [7, 21, 90, 15, 60, 10], [14, 7, 5, 95, 0, 30], [7, 21, 15, 75, 30, 5], [21, 7, 85, 65, 95, 15], [14, 7, 35, 10, 100, 45], [14, 14, 25, 5, 80, 70], [7, 21, 15, 60, 30, 40], [14, 21, 75, 90, 25, 30], [7, 21, 25, 25, 50, 80], [14, 7, 95, 70, 25, 0], [14, 21, 65, 90, 80, 45], [21, 14, 60, 40, 20, 85], [21, 7, 70, 10, 50, 5], [21, 14, 20, 35, 75, 55], [21,