In [1]:
import pandas as pd
import numpy as np
import os
import neat
import pickle
import ta
from collections import deque
import random
import time
from statistics import mean
from sklearn import preprocessing

#hide warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
sequence_length = 30

In [3]:
#reading all csv from data folder and combine them all
def load_data():

    dfs = []

    for item in os.listdir('data'):
        df = pd.read_csv(f'data/{item}',
                        header=None,
                        names=['stock code','Date','Open','High','Low','Close','Volume','Netforeign'])
        df['Date'] = pd.to_datetime(df['Date'])
        df.dropna(inplace=True)
        df.set_index('Date',inplace=True)

        #sort values by date
        df.sort_values('Date',inplace=True)
        dfs.append(df)

    main_df = pd.concat(dfs)
    main_df.tail()
    
    ##################################################################################################
    #read tradeble stocks
    tradable = pd.read_csv('tradable.csv')
    
    #creating a new df of tradable stock
    tradable_stock_df = main_df[main_df['stock code'].isin(tradable['stock'])]
    tradable_stock_df.head()
    
    tradable_stock_list = tradable_stock_df['stock code'].unique()
    tradable_stock_list.sort()

    print(tradable_stock_list,len(tradable_stock_list))
    print("\n\n")
    
    #group by tradable stock
    tradable_stock_df=tradable_stock_df.groupby('stock code')
    
    return tradable_stock_df

In [4]:
def process_data(data):
    data.drop("Netforeign",1,inplace=True) #drop netforeign
    data.drop("stock code",1,inplace=True) #drop stock code
    data['Volume'].replace(0,1,inplace=True) #replace 0 value volume with 1
    data.interpolate(inplace=True)
    
    #adding technical indicators
    data = ta.add_all_ta_features(data,
                              open="Open",
                              high="High",
                              low="Low",
                              close="Close",
                              volume="Volume",
                              fillna=False)
    
    col_list = ["Close","Volume",
            "momentum_rsi","momentum_wr","momentum_ao","momentum_stoch","momentum_stoch_signal",
            "trend_trix","trend_vortex_ind_pos","trend_vortex_ind_neg",
            "trend_vortex_diff","trend_macd","trend_macd_signal","trend_macd_diff",
           "volatility_atr"]
    
    data = data[col_list]
    
    #OPEN , HIGH, LOW, CLOSE, VOLUME
    data['Close'] = data['Close']
    data['Volume'] = data['Volume']
    data['momentum_rsi'] = data['momentum_rsi']
    data['momentum_wr'] = data['momentum_wr']
    data['momentum_stoch'] = data['momentum_stoch']
    data['momentum_stoch_signal'] = data['momentum_stoch_signal']
    data['Pure_Close'] = data['Close']
    
    data.dropna(inplace=True)
    
    for col in data.columns[:-1]:
        data[col] = preprocessing.scale(data[col].values)
    
    return data

In [5]:
def save_winner(winner):

    pickle_out = open("winner_net.pickle", "wb")
    pickle.dump(winner, pickle_out)
    pickle_out.close()

In [6]:
def deque_sequence(data):
    global sequence_length
    #initialize deque .. sequence of
    #number of sequence by open ,high ,low ,close++
    sequence = deque(maxlen=sequence_length)
    
    #if sequence_length = 6 .. it looks like this.. a 6 by 5 matrix
    for _ in range(sequence_length):
        sequence.append([0 for _ in data.columns[:-1]])
        
        """ 6(sequence) x 5(open,high,low,close)
                        0, 0, 0, 0, 0
                        0, 0, 0, 0, 0
                        0, 0, 0, 0, 0
                        0, 0, 0, 0, 0
                        0, 0, 0, 0, 0 """
#     print(sequence)
    return sequence

In [7]:
def get_action(data,net):
    #1stock
    position_days = 0
    current_position = 0
    stock_reward = 0
    #initialize deque sequence
    sequence = deque_sequence(data)

    """ 6(sequence) x 5(open,high,low,close)
            0, 0, 0, 0, 0
            0, 0, 0, 0, 0
            0, 0, 0, 0, 0
            0, 0, 0, 0, 0
            0, 0, 0, 0, 0 """
    
    
    #FEEDING THE NEURAL NET
    for vals in data.values:
#         print(vals[:-1])
#         print(vals[-1])  ##pure_close
        current_price = vals[-1]
        #append the values of data (open,high,low,close) to deque sequence
        sequence.append(vals[:-1])
        
        #convert deque function to a numpy array
        x = np.array(sequence)

        #flatten features
        x = x.flatten()   

#         #append positon_change and position days ... more feature
#         x = np.append(x,[position_change,position_days])
        
#         #feed features to neural network
        output = net.activate(x)

#         #action recomended by the neural network 
        action = np.argmax(output, axis=0)
        
#         print(action)
        current_position ,position_days ,reward = do_action(current_price,action,position_days,current_position)
        stock_reward += reward
    
    return stock_reward

In [8]:
def do_action(current_price,action,position_days,current_position):
    profit = 0
    reward = 0
    position_change = (current_price-current_position)/current_position

    
#     """if action is BUY and has no position"""
    if action == 1 and position_days == 0 :
        position_days = 1
        current_position = current_price
    
#     """if action is BUY and has position"""
    elif action == 1 and position_days > 0 :
        position_days += 1
        
        #SELL if position is 60 days older or -10%
        if position_days >= 45 or position_change < -5/100:
            #SELL
            
            #check trade if win or loss
            if position_change >  1.19/100:
                reward = 1
#                 print(f"profit:{position_change*100} days:{position_days}")
                
            else:
                reward = -1
                profit = position_change*100
#                 print(f"profit:{position_change*100} days:{position_days}")
                
            
            #RESET
            position_days = 0
            
#     """if action is SELL and has no position"""
    elif action == 2 and position_days == 0 :
        pass
    
#     """if action is SELL and has position"""
    elif action == 2 and position_days > 0 :
        position_days += 1
        
        if position_change > 1.19/100:
            reward = 1
#             print(f"profit:{position_change*100} days:{position_days}")
            
        else:
            reward = -1
#             print(f"profit:{position_change*100} days:{position_days}")
            
        
        #RESET
        position_days = 0
        
#     """if action is hold and has no position"""
    elif action == 0 and position_days == 0 :
        pass
    
#     """if action is hold and has position"""
    elif action == 0 and position_days > 0 :
        position_days += 1
        
        #sell if position is 60 days older
        if position_days >= 45 or position_change < -5/100:
            #SELL
            
            #check trade if win or loss
            if position_change > 1.19/100 :
                reward = 1
#                 print(f"profit:{position_change*100} days:{position_days}")
            else:
                reward = -1
#                 print(f"profit:{position_change*100} days:{position_days}")
            
            #RESET
            position_days = 0

        
        
            
    return current_position,position_days,reward

In [9]:
def run(config_file):
    #globals
    
    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)

#     pe = neat.parallel.ParallelEvaluator(8, eval_genomes)
    #loading winner net
#     winner = pickle.load(open("winner_net7.pickle","rb"))
    
    #creating the winner net
#     winner_net = neat.nn.RecurrentNetwork.create(winner, config)

    
    # # Create the population, which is the top-level object for a NEAT run.
    p = neat.Population(config)

            # continue from save point
#     p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-0')
    
    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(5))
    
    # Run for up to 300 generations.
    winner = p.run(eval_genomes,100)
    save_winner(winner)

    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(winner))

In [10]:
def eval_genomes(genomes, config):
    for genome_id, genome in genomes: #create genomes
        #eval genomes here
        start = time.time()
        #load stockdata
        global stock_data
        total_reward = 0

        #initialize genome
        genome.fitness = 0.0
        net = neat.nn.RecurrentNetwork.create(genome, config)

        #stock list and shuffle
        stock_list = [stock for stock,data in stock_data]
        random.shuffle(stock_list)
        
        for stock in stock_list[:1]: ############################################# 
            data = stock_data.get_group('JFC')
            data.sort_values('Date',inplace=True)
            data = process_data(data)
            data = data.loc[:'2018-11-05']
            if len(data) > 30:
#                 print(stock)
#                 print(f"trading days:{len(data)} start:{data.index[0]}")
#                 print(data['Pure_Close'].head())
    #             print(len(data))
    #             print(data.tail())
                
                stock_reward = get_action(data,net)
                total_reward += stock_reward
        end = time.time()

#         print(f"time:{end-start}")

        genome.fitness = total_reward


In [11]:
#loading data
stock_data = load_data()

['ABA' 'ABG' 'AC' 'ACEPH' 'ACEX' 'AEV' 'AGI' 'ALCO' 'ALI' 'ALLHC' 'ANI'
 'AP' 'APC' 'APX' 'AXLM' 'BCOR' 'BDO' 'BLOOM' 'BPI' 'BRN' 'C' 'CEB' 'CHP'
 'CIC' 'CNPF' 'COSCO' 'CPG' 'CPM' 'DD' 'DMC' 'DNL' 'EAGLE' 'EEI' 'EMP'
 'EW' 'FB' 'FGEN' 'FLI' 'FNI' 'FOOD' 'FRUIT' 'GERI' 'GLO' 'GTCAP' 'HLCM'
 'HOME' 'HOUSE' 'ICT' 'IDC' 'IMI' 'ION' 'IRC' 'ISM' 'JFC' 'JGS' 'KPPI'
 'LPZ' 'LR' 'LTG' 'MAC' 'MAXS' 'MBT' 'MEG' 'MER' 'MHC' 'MPI' 'MRC' 'MRSGI'
 'MWC' 'MWIDE' 'NIKL' 'NOW' 'ORE' 'PCOR' 'PGOLD' 'PHA' 'PHES' 'PIP'
 'PIZZA' 'PLC' 'PNB' 'PNX' 'PRMX' 'PX' 'PXP' 'RCB' 'RLC' 'RRHI' 'RWM'
 'SCC' 'SECB' 'SHLPH' 'SLI' 'SM' 'SMC' 'SMPH' 'SSI' 'STI' 'STR' 'TBGI'
 'TECH' 'TEL' 'TUGS' 'UBP' 'URC' 'VITA' 'VLL' 'VUL' 'WIN' 'WLCON' 'WPI'
 'X'] 112





In [12]:
if __name__ == '__main__':
    # Determine path to configuration file. This path manipulation is
    # here so that the script will run successfully regardless of the
    # current working directory.
    local_dir = os.path.dirname('__file__')
    config_path = os.path.join(local_dir, 'config-feedforward.txt')
    run(config_path)


 ****** Running generation 0 ****** 

Population's average fitness: -45.40000 stdev: 19.07022
Best fitness: -5.00000 - size: (3, 675) - species 1 - id 29
Average adjusted fitness: 0.575
Mean genetic distance 1.727, standard deviation 0.327
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    0    60     -5.0    0.575     0
Total extinctions: 0
Generation time: 198.252 sec

 ****** Running generation 1 ****** 

Population's average fitness: -33.06667 stdev: 19.55664
Best fitness: -5.00000 - size: (3, 675) - species 1 - id 29
Average adjusted fitness: 0.616
Mean genetic distance 1.535, standard deviation 0.463
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    1    60     -5.0    0.616     1
Total extinctions: 0
Generation time: 197.848 sec (198.050 average)
Saving checkpoint to neat-checkpoint-1

 ****** Running generation 2 ****** 

Population's average fitness: -26.16667 stdev: 14.89202
Best fitness: -2.000

Population's average fitness: 4.83333 stdev: 11.27559
Best fitness: 36.00000 - size: (3, 637) - species 1 - id 936
Average adjusted fitness: 0.433
Mean genetic distance 1.304, standard deviation 0.277
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   17    60     36.0    0.433     1
Total extinctions: 0
Generation time: 194.973 sec (197.173 average)
Saving checkpoint to neat-checkpoint-17

 ****** Running generation 18 ****** 

Population's average fitness: 5.10000 stdev: 13.22334
Best fitness: 36.00000 - size: (3, 637) - species 1 - id 936
Average adjusted fitness: 0.448
Mean genetic distance 1.193, standard deviation 0.311
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   18    60     36.0    0.448     2
Total extinctions: 0
Generation time: 195.243 sec (197.003 average)

 ****** Running generation 19 ****** 

Population's average fitness: 10.31667 stdev: 11.84271
Best fitness: 36.00000 - size: (3, 637) -

Population's average fitness: 24.86667 stdev: 15.61993
Best fitness: 54.00000 - size: (7, 611) - species 1 - id 1810
Average adjusted fitness: 0.612
Mean genetic distance 1.530, standard deviation 0.443
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   34    60     54.0    0.612     3
Total extinctions: 0
Generation time: 191.025 sec (196.960 average)

 ****** Running generation 35 ****** 

Population's average fitness: 26.08333 stdev: 15.70275
Best fitness: 59.00000 - size: (8, 616) - species 1 - id 2007
Average adjusted fitness: 0.501
Mean genetic distance 1.412, standard deviation 0.456
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   35    60     59.0    0.501     0
Total extinctions: 0
Generation time: 195.328 sec (196.997 average)
Saving checkpoint to neat-checkpoint-35

 ****** Running generation 36 ****** 

Population's average fitness: 25.68333 stdev: 16.55445
Best fitness: 59.00000 - size: (8, 61

Population's average fitness: 39.95000 stdev: 15.70395
Best fitness: 80.00000 - size: (4, 567) - species 1 - id 2918
Average adjusted fitness: 0.444
Mean genetic distance 2.094, standard deviation 0.246
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   51    60     80.0    0.444     0
Total extinctions: 0
Generation time: 188.765 sec (192.879 average)
Saving checkpoint to neat-checkpoint-51

 ****** Running generation 52 ****** 

Population's average fitness: 40.91667 stdev: 14.49401
Best fitness: 80.00000 - size: (4, 567) - species 1 - id 2918
Average adjusted fitness: 0.450
Mean genetic distance 1.893, standard deviation 0.288
Population of 60 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   52    60     80.0    0.450     1
Total extinctions: 0
Generation time: 189.892 sec (191.597 average)

 ****** Running generation 53 ****** 

Population's average fitness: 38.30000 stdev: 15.84645
Best fitness: 80.00000 - size: (4, 56

Population's average fitness: 48.95000 stdev: 18.31250
Best fitness: 81.00000 - size: (5, 573) - species 1 - id 3199
Average adjusted fitness: 0.666
Mean genetic distance 1.666, standard deviation 0.479
Population of 60 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   67    30     81.0    0.660    11
     2    8    30     79.0    0.673     1
Total extinctions: 0
Generation time: 196.092 sec (192.093 average)
Saving checkpoint to neat-checkpoint-67

 ****** Running generation 68 ****** 

Population's average fitness: 50.18333 stdev: 18.00231
Best fitness: 81.00000 - size: (5, 573) - species 1 - id 3199
Average adjusted fitness: 0.637
Mean genetic distance 1.674, standard deviation 0.456
Population of 60 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   68    29     81.0    0.623    12
     2    9    31     79.0    0.652     2
Total extinctions: 0
Generation time: 204.694 sec (193.546 average)

 ****** Running generation 69 ****** 

Populati

Mean genetic distance 1.680, standard deviation 0.559
Population of 60 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   82    29     82.0    0.636     0
     2   23    31     79.0    0.616    16
Total extinctions: 0
Generation time: 193.050 sec (191.087 average)

 ****** Running generation 83 ****** 

Population's average fitness: 52.35000 stdev: 16.56686
Best fitness: 82.00000 - size: (4, 564) - species 1 - id 4651
Average adjusted fitness: 0.598
Mean genetic distance 1.673, standard deviation 0.514
Population of 60 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   83    27     82.0    0.568     1
     2   24    33     81.0    0.629     0
Total extinctions: 0
Generation time: 194.206 sec (191.252 average)
Saving checkpoint to neat-checkpoint-83

 ****** Running generation 84 ****** 

Population's average fitness: 50.90000 stdev: 15.87209
Best fitness: 82.00000 - size: (4, 564) - species 1 - id 4651
Average adjusted fitness: 0.584
Mean gen


 ****** Running generation 98 ****** 

Population's average fitness: 53.63333 stdev: 18.29660
Best fitness: 105.00000 - size: (6, 559) - species 3 - id 4821
Average adjusted fitness: 0.466
Mean genetic distance 1.897, standard deviation 0.742
Population of 60 members in 3 species:
   ID   age  size  fitness  adj fit  stag
     1   98    14     75.0    0.413    13
     2   39     6     74.0    0.473     4
     3    1    40    105.0    0.514     0
Total extinctions: 0
Generation time: 195.164 sec (192.722 average)

 ****** Running generation 99 ****** 

Population's average fitness: 52.90000 stdev: 17.80983
Best fitness: 105.00000 - size: (6, 559) - species 3 - id 4821
Average adjusted fitness: 0.452
Mean genetic distance 1.813, standard deviation 0.841
Population of 60 members in 3 species:
   ID   age  size  fitness  adj fit  stag
     1   99    17     69.0    0.442    14
     2   40    13     62.0    0.440     5
     3    2    30    105.0    0.474     1
Total extinctions: 0
Generatio