In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from random import *
import time

## Uso do conjunto reduzido de dados (5130 pacientes)

In [17]:
dados = pd.read_csv('dadosPO201reduced.csv')

In [18]:
dados.drop(columns = 'Unnamed: 0', inplace = True)
dados.head()


Unnamed: 0,febre,tosse,dor.de.garganta,dispneia,desconforto.respiratorio,saturacao,diarreia,vomito,outros.sintomas,doenca.cardiovascular.cronica,imunodeficiencia.imunodepressao,diabetes.mellitus,obesidade,outros.riscos,doencas.respiratorias.cronicas,sexo,idade,gravidade2
0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,57,0
1,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,1,39,0
2,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,16,0
3,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,42,0
4,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,50,0


In [19]:
dados.shape

(5130, 18)

In [20]:
pd.value_counts(dados['gravidade2'].values)

0    3420
1    1710
dtype: int64

In [21]:
df_positive = dados[dados['gravidade2'] == 1]
df_negative = dados[dados['gravidade2'] == 0]

## Função de determinação de parcelas de separabilidade de atributos

In [22]:
attributes = list(dados.columns)
attributes.remove('gravidade2')


def processingSeparability(df_negative, df_positive,attributes):
    separability = []
    target_0 = df_negative['gravidade2']
    target_1 = df_positive['gravidade2']
    for attr in attributes:
        total = 0
        attr_values_neg = list(df_negative[attr])
        attr_values_pos = list(df_positive[attr])
        #print(attr)
        for item_1 in range(df_negative.shape[0]):
            for item_2 in range(df_positive.shape[0]):
                if attr_values_neg[item_1] != attr_values_pos[item_2]:
                    total += 1
        separability.append(total)
    return separability

## Definição de funções de aptidão, competição, crossover e mutação

In [23]:
def fitnessFunction(individual, separability, beta):
    if sum(individual) > beta:
        return 0
    else:
        return np.matmul(individual,separability)


In [24]:
def mutate(individual, probability):
    for item in range(len(individual)):
        if random() < probability:
            individual[item] = 1 - individual[item]
    return individual

In [25]:
def crossover(parent1, parent2, probability):
    child1, child2 = parent1.copy(), parent2.copy()
    if random() < probability:
        crossover_point = randint(1,len(parent1)-2)
        child1 = parent1[:crossover_point] + parent2[crossover_point:]
        child2 = parent2[:crossover_point] + parent1[crossover_point:]
    return [child1, child2]

In [26]:
def selection(population, scores, k = 3):
    random_individual = randint(0,len(population)-1)
    for opponent in sample(range(len(population)),k-1):
        #print(opponent, '', random_individual)
        if scores[opponent] > scores[random_individual]:
            random_individual = opponent
    return population[random_individual]

## Execução do algoritmo genético

In [27]:
def geneticAlgorithm(df_negative, 
                     df_positive, 
                     attributes, 
                     chromossome_size,
                     pop_size,
                     num_iter,
                     prob_crossover,
                     prob_mutation): 
        separability = processingSeparability(df_negative,df_positive,attributes)
        print('Separability: ',separability)
        population = [[randint(0, 1) for item in range(chromossome_size)] for item in range(pop_size)]
        best, best_eval = 0, fitnessFunction(population[0], separability, beta)

        for generation in range(num_iter):
            scores = [fitnessFunction(individual, separability, beta) for individual in population]
            
            sums = [sum(individual) for individual in population]
            for (score, sum_) in zip(scores, sums):
                if sum_ > beta and score > 0:
                    print('Warning! Fitness function needs maintenance!')


            for individual in range(pop_size):
                if scores[individual] > best_eval:
                    best = population[individual]
                    best_eval = scores[individual]
                    print("Generation %d, new best f(%s) = %.3f" % (generation,  
                    best, 
                    best_eval))
            selected = [selection(population,scores) for tournament in range(pop_size)]
            offspring = []
            for i in range(0, pop_size, 2):
                parent1, parent2 = selected[i], selected[i+1]
                for child in crossover(parent1, parent2, prob_crossover):
                    child = mutate(child, prob_mutation)
                    offspring.append(child)
            population = offspring

In [None]:
prob_crossover = 0.45
prob_mutation = 0.01

num_iter = 3000
pop_size = 20
chromossome_size = 17
beta_list = range(4,9)


for beta in beta_list:
    time1 = time.time()
    
    print("_________________________________________________")
    print("RUNNING FOR BETA = ", beta)
    print("_________________________________________________")
    geneticAlgorithm(df_negative, 
                     df_positive, 
                     attributes, 
                     chromossome_size,
                     pop_size,
                     num_iter,
                     prob_crossover,
                     prob_mutation)
    
    time2 = time.time()
    total_time = time2-time1
    print("time elapsed %.3f seconds" % total_time)


_________________________________________________
RUNNING FOR BETA =  4
_________________________________________________
Separability:  [2837274, 2080704, 3355020, 3617650, 3319892, 4047306, 530100, 324900, 2754050, 2792436, 227626, 2280904, 434340, 2561580, 308984, 2964966, 5790913]
Generation 0, new best f([1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]) = 14913045.000 with sum 4
Generation 44, new best f([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]) = 15640459.000 with sum 4
Generation 280, new best f([0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]) = 15728549.000 with sum 4
Generation 293, new best f([0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]) = 16420835.000 with sum 4
Generation 362, new best f([0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]) = 16810889.000 with sum 4
time elapsed 23.048 seconds
time elapsed 23.048 seconds
_________________________________________________
RUNNING FOR BETA =  5
_________________________________________________
Separabil