In [305]:
import pandas as pd
import random
from copy import deepcopy
import numpy as np


In [306]:
import import_ipynb 
from accuracy import calc_accuracy

In [307]:
X_train = pd.read_csv('data/X_train.csv')
X_test  = pd.read_csv('data/X_test.csv')

y_train = pd.read_csv('data/y_train.csv')
y_test  = pd.read_csv('data/y_test.csv')

In [308]:
#from sklearn.svm import SVC
#from sklearn.metrics import accuracy_score

In [309]:
class Individual:
    def __init__(self,code,fitness = -float("inf")):
        self.code = code
        self.fitness = fitness

In [310]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NN(nn.Module):
    def __init__(self, input_dim, hidden_dim=32, output_dim=1):
        super().__init__()
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.layer3 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.layer3(x)
        return x

In [311]:
class GA:

    def __init__(self, X:pd.DataFrame, y:pd.DataFrame, population_size:int, num_generations:int, tournament_size:int, mutation_probability:float):
        #dodaj elitizam
        self.X = X
        self.y = y
        self.all_column_names = X.columns
        self.population_size = population_size
        self.num_generations = num_generations
        self.tournament_size = tournament_size
        self.mutation_probability = mutation_probability
        
        
        self.population = [ Individual(code=(np.random.rand(X.shape[1]) < 0.25)) for _ in range(self.population_size) ]
        self.best_fitness = -float('inf')
        self.best = None
        self.history = []

    def calc_fitness(self,index): 
        if not any(self.population[index].code):
            self.population[index].fitness =  -float('inf')
            return
        
        new_column_names = [j for i,j in enumerate(self.all_column_names) if self.population[index].code[i]]
    
        #model = SVC(kernel='linear',probability=False)
        #self.y = self.y.squeeze()
        #model.fit(self.X[new_column_names],self.y)
        #y_pred = model.predict(self.X[new_column_names]) #train test split
        #acc = accuracy_score(y_pred,self.y)

        acc = calc_accuracy(self.population[index].code,X_train,y_train,X_test,y_test)
        
        num_features = self.population[index].fitness =  sum(self.population[index].code)
        self.population[index].fitness = acc + (1 - num_features / self.X.shape[1])   
        # a*tacnost + b * (1 - izabrani_atributi / ukupno_atributa )
        #ili a * tacnost + (1 - a) ...
        #mozda da oduzimam drugi deo
        

    def crossover(self,parent1,parent2):
        breakpoint = random.randrange(1, len(parent1.code))
        child1 = Individual(np.concatenate([parent1.code[:breakpoint],parent2.code[breakpoint:]]) )
        child2 =  Individual(np.concatenate([parent2.code[:breakpoint],parent1.code[breakpoint:]]) )

        return child1,child2

    def mutation(self,child):
        for i in range(len(child.code)):
            if random.random() < self.mutation_probability:
                child.code[i] = not child.code[i]
        return child
        
    def tournament_selection(self):
        contenders = random.sample(range(len(self.population)), self.tournament_size)
        return max(contenders, key=lambda i: self.population[i].fitness)
        

    def run(self):
               
        for generation in range(self.num_generations):
            
            for i in range(self.population_size):
                self.calc_fitness(i)
                if self.population[i].fitness > self.best_fitness:
                    self.best_fitness = self.population[i].fitness
                    self.best = self.population[i].code.copy()
            
            self.history.append((generation,self.best_fitness,self.best))
            new_population = []
            for i in range(0,self.population_size,2):
                parent1 = self.population[self.tournament_selection()]
                parent2 = self.population[self.tournament_selection()]
                child1, child2 = self.crossover(parent1, parent2)
                new_population.append(self.mutation(child1))
                if len(new_population) < self.population_size: #unapredi ovo
                    new_population.append(self.mutation(child2))
            self.population = deepcopy(new_population)

    

        return self.best, self.best_fitness #, history
    

In [312]:
ga = GA(X_train,y_train,population_size = 100, num_generations = 10, tournament_size=4, mutation_probability = 0.05)
selected_features, fitness = ga.run()

torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
0.8636363636363636
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.8636363636363636
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9090909090909091
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
1.0
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
0.8636363636363636
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
1.0
torch.Size([22, 1])
1.0
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
1.0
torch.Size([22, 1])
1.0
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.9545454545454546
torch.Size([22, 1])
1.0
torch.Size([22, 1])
1.0
torch.Size([22, 1])
0.954545454

In [313]:
fitness

1.757750035068032

In [272]:
#X_test[X_test.columns[selected_features]]

In [273]:
print(selected_featues)

[False False False ...  True False  True]
