In [87]:
import pandas as pd
import numpy as np
#import pdb
import matplotlib.pyplot as plt
%matplotlib inline


# Observaciones


+ Los indicadores se utilizarán para generar la métrica de consenso y a partir de esta generar las señales.
+ ¿Cómo cruzar individuos? (Se facilita si cada individuo tiene el mismo número de indicadores)
+ ¿Mutuar sólo parámetros del indicador o el indicador en su totalidad?
+ ¿Cómo calcular el fitness?

In [93]:

#Prepares the data

data=pd.read_csv('amxl.csv')

#The next line was neccesary for handling Date column, some hidden characters appeared when reading the csv file
data.columns = ['Date','Open','High','Low','Close','Volume'] 

#Converts string dates to dates
data['Date'] = pd.to_datetime(data['Date'])

#Sorts the data from oldest to newest
data=data.sort_values(by='Date')

#Need to be careful with how much data I use since this makes the algorithm slow
data=data.iloc[-365*1:]

In [89]:
def selectNumericalReal(limits):
    '''
    Selects a real random number between the limits
    '''
    return np.random.uniform(limits[0],limits[1],1)[0]

def selectNumericalInteger(limits):
    '''
    Selects a random integer between the limits
    '''
    return np.random.choice(range(limits[0],limits[1]+1),1)[0]

def selectCategorical(kinds):
    '''
    Selects a random category
    '''
    return np.random.choice(kinds,1)[0]

def getMaxTimeWindow(indicators):
    '''
    Returns the max value for the time window in all the indicators from one individual
    '''
    maxTimeWindow=[]
    for indicator in indicators:
        maxTimeWindow.append(indicator.params['timeWindow'])
    return  np.max(maxTimeWindow)

def movingAverage(data,method='simple'):
    if method=='simple':
        return np.nanmean(data)


def fitness(individual):
    '''
    Returns the fitness of an individual
    using the consensus trading signals
    '''
    gain=0
    flagFirstSignal=True
    currentMarketPosition='hold' #hold,short,long
    lastSignal='hold' #hold,buy,sell
    lastTradingPrice=0
    transactionsCount=0
    
    #Filters the data
    filteredData=data[data.iloc[:,0]>=individual.consensus.iloc[0,0]]
    
    #Calculates the cummulative gain up to one day before the last one
    #since the execution price is calculated using t+1 data.
    #Starts in 1 since the first day has no trading signals
    for t in range(1,len(individual.consensus) - 1):
        executionPrice=(filteredData['High'].iloc[t+1] + filteredData['Low'].iloc[t+1])/2.0
        if individual.consensus.iloc[t,1]==1 and lastSignal!='buy':
            if flagFirstSignal:
                gain=executionPrice - transactionCost*executionPrice
                flagFirstSignal=False
                currentMarketPosition='long'
                lastTradingPrice=executionPrice
                lastSignal='buy'
                transactionsCount=transactionsCount+1
            else:
                gain=gain + lastTradingPrice - \
                executionPrice - transactionCost*executionPrice
                currentMarketPosition='long'
                lastTradingPrice=executionPrice
                lastSignal='buy'
                transactionsCount=transactionsCount+1
        if individual.consensus.iloc[t,1]==-1 and lastSignal!='sell':       
            if flagFirstSignal:
                #Short sell
                gain=-1*executionPrice - transactionCost*executionPrice
                flagFirstSignal=False
                currentMarketPosition='short'
                lastTradingPrice=executionPrice
                lastSignal='sell'
                transactionsCount=transactionsCount+1
            else:
                gain=gain + executionPrice \
                - lastTradingPrice - transactionCost*executionPrice
                currentMarketPosition='short'
                lastTradingPrice=executionPrice
                lastSignal='sell'
                transactionsCount=transactionsCount+1
                
    if gain<=0:
        gain=0.5
    return gain,transactionsCount

def crossover(population,probabilities):
    #Selects the parents according to their fitness
    parents=np.random.choice(population,size=2,replace=False,p=probabilities)

    #Selects crossover point (avoids ends)
    crossoverPoint=np.random.choice(range(1,len(parents[0].indicators)-1))

    #Creates the children
    child1=parents[0]
    child2=parents[1]
    child1.indicators=parents[0].indicators[0:crossoverPoint] + parents[1].indicators[crossoverPoint:]
    child2.indicators=parents[1].indicators[0:crossoverPoint] + parents[0].indicators[crossoverPoint:]
    
    return [child1,child2]
    
         
    
class indicator:
    '''
    Creates a randomly chosen technical indicator
    '''
        
    def __init__(self):
        self.params = {}
        self.params['indicator'] = selectCategorical(indicatorKinds) 
        
        if self.params['indicator']=='BB':
            self.params['timeWindow'] = selectNumericalInteger(timeWindow)
            self.params['stdDev'] = selectNumericalReal(sigma)
            self.params['movingAverageMethod'] = selectCategorical(typeMA)
            #if exponentialMA:
            #Smoothing parameter
            #self.params['priceType'] (close,low,high,open)
            
        elif self.params['indicator']=='MA':
            self.params['timeWindow'] = selectNumericalInteger(timeWindow)
            self.params['movingAverageMethod'] = selectCategorical(typeMA)
            
    def bollingerSignals(self,data,start):
        '''
        Returns signals generated by a Bollinger Band indicator
        '''
        signal=[0]
        lowerBound=[]
        upperBound=[]
        window=self.params['timeWindow']
        for t in range(0,len(data) + window -start):
            lowerBound.append(movingAverage(data['Close'].iloc[start-window+t:start + t +1]) - \
                              self.params['stdDev']*np.std(data['Close'].iloc[start-window+t:start + t +1]))
            upperBound.append(movingAverage(data['Close'].iloc[start-window+t:start + t +1]) + \
                              self.params['stdDev']*np.std(data['Close'].iloc[start-window+t:start + t +1]))
            
        auxRow=0    
        for t in range(start + 1,len(data)):
            
            if data['Close'].iloc[t] > upperBound[auxRow+1] and data['Close'].iloc[t-1] > upperBound[auxRow]:
                signal.append(1)
            elif data['Close'].iloc[t] > lowerBound[auxRow+1] and data['Close'].iloc[t-1] < lowerBound[auxRow]:
                signal.append(1)
            elif data['Close'].iloc[t] < lowerBound[auxRow+1] and data['Close'].iloc[t-1] < lowerBound[auxRow]:
                signal.append(-1)
            elif data['Close'].iloc[t] < upperBound[auxRow+1] and data['Close'].iloc[t-1] < upperBound[auxRow]:
                signal.append(-1)
            else:
                signal.append(0)
            auxRow=auxRow + 1 
        return signal
    
    def movingAverageSignals(self,data,start):
        '''
        Returns signals generated by a Moving Average indicator
        '''
        signal=[0]
        MA=[]
        window=self.params['timeWindow']
        for t in range(0,len(data) + window - start ):
            MA.append(movingAverage(data['Close'].iloc[start-window+t:start + t +1]))
            
        auxRow=0    
        for t in range(start + 1,len(data)):
            if data['Close'].iloc[t]>MA[auxRow+1] and data['Close'].iloc[t-1]<MA[auxRow]:
                signal.append(1)
            elif data['Close'].iloc[t]<MA[auxRow+1] and data['Close'].iloc[t-1]>MA[auxRow]:
                signal.append(1)
            else:
                signal.append(0)
        return signal        

class individual:
    '''
    creates an individual from the population
    '''
    def __init__(self):
        self.numberIndicators = selectNumericalInteger(indicatorsNumber)
        self.indicators=[]
        for i in range(0,self.numberIndicators):
            self.indicators.append(indicator())    
        self.maxTimeWindow=getMaxTimeWindow(self.indicators)
        self.signals = pd.DataFrame(columns=range(0,self.numberIndicators +1))
        self.signals[0]=data['Date'][self.maxTimeWindow:] #First column contains the dates
        self.consensus = pd.DataFrame(columns=range(0,2)) #Final signals
        self.consensus[0] = data['Date'][self.maxTimeWindow:]
        self.fitness=0
        self.transactionsNumber=0
    
    def getFitness(self):
        self.fitness,self.transactionsNumber=fitness(self)
    
    def getSignals(self,data):
        '''
        Gets the trading signals for a specific indicator using the data in the dataset
        ======
        Params:
        data: A pandas data set containing the data
        ======

        ======
        Output:
        signals: Pandas dataframe with dates and signals
        ======
        '''
        for i in range(1,self.numberIndicators+1):
            indicator = self.indicators[i-1]
            if indicator.params['indicator']=='BB':
                self.signals[i] = indicator.bollingerSignals(data=data,start=self.maxTimeWindow)
            elif indicator.params['indicator']=='MA':
                self.signals[i] = indicator.movingAverageSignals(data=data,start=self.maxTimeWindow)
                
        #Consensus signals        
        for row in range(1,len(self.signals)):
            if float(np.sum(self.signals.iloc[row,1:]==1))/len(self.indicators) > pConsensus:
                self.consensus.iloc[row,1]=1
            elif float(np.sum(self.signals.iloc[row,1:]==-1))/len(self.indicators) > pConsensus:
                self.consensus.iloc[row,1]=-1
            else:
                self.consensus.iloc[row,1]=0
                
        self.consensus.iloc[0,1]=0
        self.consensus.columns = ['Date','Signal']
            

In [None]:
#Defines the domain for the parameters
#The format is:
#[lowerBound,upperBound] For numerical parameters
#[category1,...,categoryN] For categorical parameters
sigma=[0.1,2.5]
timeWindow=[5,15]
typeMA=['simple','simple']
indicatorKinds=['BB','MA']
indicatorsNumber=[4,4]

#Defines the proportion for minimum consensus
pConsensus=0.4

#Defines the per-transaction cost
transactionCost=0.25/100

#Defines the mutation probability
pMutation=0.01

populationSize=10
population=[]
newPopulation=[]
numberGenerations=50
countGeneration=1

#Creates initial population
for i in range(0,populationSize):
    population.append(individual())
    
while countGeneration<=numberGenerations:
    #Gets population signals
    for i in range(0,len(population)):
        population[i].getSignals(data=data)    

    #Gets individual fitness
    #and normalizes in order to get selection probabilities
    probabilities=[]
    for entity in population:
        entity.getFitness()
        probabilities.append(entity.fitness)
    print 'Max fitness so far: ' + str(np.max(probabilities)) + ' Generation: ' + str(countGeneration)
    probabilities=probabilities/np.sum(probabilities)
    
    #Creates the new population
    while len(newPopulation)<len(population):
        
        #Creates children
        children=crossover(population,probabilities)
        newPopulation.append(children[0])
        newPopulation.append(children[1])
    
    #Mutates population
    for i in range(0,len(newPopulation)):
        u=np.random.uniform()
        if u<pMutation:
            newPopulation[i]=individual()
    
    #Replace the old population
    population=newPopulation
    newPopulation=[]
    countGeneration=countGeneration + 1
        
        

# To Do
* Implement more indicators
