In [15]:
from scipy.io import arff
import pandas as pd
import numpy as np
from fs_neat import FS_NEAT
from neat import set_seed
import torch
import torch.nn as nn

In [2]:
data = arff.loadarff('breastCancer-train.arff')
df_train = pd.DataFrame(data[0])

data = arff.loadarff('breastCancer-test.arff')
df_test = pd.DataFrame(data[0])

df_train.shape, df_test.shape

((78, 24482), (19, 24482))

In [3]:
df_train.head()

Unnamed: 0,Contig45645_RC,Contig44916_RC,D25272,J00129,Contig29982_RC,Contig26811,D25274,Contig36292,Contig42854,Contig34839,...,NM_000898,NM_000899,Contig20164_RC,Contig8985_RC,Contig36062_RC,Contig35333_RC,Contig62037_RC,AF067420,Contig15167_RC,Class
0,-0.299,0.093,-0.215,-0.566,-0.596,-0.195,0.039,-0.409,-0.352,0.066,...,-0.96,-0.211,0.155,-0.095,-0.025,-0.037,0.215,0.307,0.321,b'relapse'
1,-0.081,0.009,-0.091,-0.518,-0.502,-0.149,0.098,-0.09,0.138,0.061,...,-0.531,-0.02,0.014,-0.123,0.148,0.024,-0.07,-0.209,0.105,b'relapse'
2,-0.125,0.07,-0.006,-0.575,-0.585,-0.183,0.102,0.023,-0.35,-0.005,...,-0.883,-0.159,0.022,0.006,-0.086,0.019,0.026,-0.822,0.199,b'relapse'
3,-0.27,0.123,0.056,-0.499,-0.402,-0.099,-0.145,-0.103,0.181,0.236,...,-0.044,-0.096,0.018,0.0,0.076,0.057,-0.016,-0.36,-0.038,b'relapse'
4,-0.141,0.025,-0.031,-0.465,-0.533,-0.065,0.101,-0.008,-0.019,0.026,...,0.28,-0.088,0.043,0.207,-0.124,-0.041,-0.077,-0.432,-0.015,b'relapse'


In [4]:
df_train['Class'].unique()

array([b'relapse', b'non-relapse'], dtype=object)

In [5]:
labels = {b'relapse' : 1, b'non-relapse' : 0}
df_train['Class'] = df_train['Class'].replace(labels)
df_test['Class'] = df_test['Class'].replace(labels)

In [18]:
x_train = df_train.iloc[:, :-1].to_numpy(dtype=np.float32)
y_train = df_train.iloc[:, -1].to_numpy(dtype=np.float32)
y_train = np.expand_dims(y_train, axis=1)

x_test = df_test.iloc[:, :-1].to_numpy(dtype=np.float32)
bias = np.ones((x_test.shape[0], 1))
x_test = np.concatenate((bias, x_test), axis=1)
y_test = df_test.iloc[:, -1].to_numpy(dtype=np.float32)
y_test = np.expand_dims(y_test, axis=1)

x_test = torch.from_numpy(x_test).type(torch.float32)
y_test = torch.from_numpy(y_test).type(torch.float32)

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((78, 24481), (78, 1), torch.Size([19, 24482]), torch.Size([19, 1]))

In [14]:
params = {
	'max_iterations' : 2000,
	'n_population' : 200, 
	'initial_weight_limits' : [-5, 5],
	'input_bias' : True, 
	'activation_function' : nn.ReLU(),
	'activation_coeff' : 1.0,
	'crossover_prob' : 0.75,
	'disable_node_prob' : 0.75,
	'interspecies_mating_rate' : 0.001,
	'weight_mutation_prob' : 0.05,
	'pol_mutation_distr' : 5,
	'weight_mutation_sustitution_prob' : 0.1,
	'add_node_prob' : 0.15,
	'add_connection_prob' : 0.8,
	'compatibility_threshold' : 3,
	'compatibility_distance_coeff' : [1.0, 1.0, 0.4],
	'stagnant_generations_threshold' : 15,
	'champion_elitism_threshold' : 5
}

problem = {
	'x' : x_train,
	'y' : y_train
}

In [16]:
set_seed()
neat = FS_NEAT(problem, params)
neat.run()
neat.best_solution.describe()

Iteration: 0, Best solution fitness : 99.11334991455078
Iteration: 1, Best solution fitness : 99.11334991455078
Iteration: 2, Best solution fitness : 99.11334991455078
Iteration: 3, Best solution fitness : 99.11902618408203
Iteration: 4, Best solution fitness : 99.11902618408203
Iteration: 5, Best solution fitness : 99.11902618408203
Iteration: 6, Best solution fitness : 99.11907958984375
Iteration: 7, Best solution fitness : 99.12486267089844
Iteration: 8, Best solution fitness : 99.12486267089844
Iteration: 9, Best solution fitness : 99.12486267089844
Iteration: 10, Best solution fitness : 99.12486267089844
Iteration: 11, Best solution fitness : 99.12486267089844
Iteration: 12, Best solution fitness : 99.12486267089844
Iteration: 13, Best solution fitness : 99.12486267089844
Iteration: 14, Best solution fitness : 99.12486267089844
Iteration: 15, Best solution fitness : 99.12486267089844
Iteration: 16, Best solution fitness : 99.12486267089844
Iteration: 17, Best solution fitness : 99

In [19]:
acc, fitness = neat.evaluate(neat.best_solution, neat.x_train, neat.y_train)
print(f'Train dataset: fitness = {fitness}, accuracy = {acc} ')

acc, fitness = neat.evaluate(neat.best_solution, x_test, y_test)
print(f'Test dataset: fitness = {fitness}, accuracy = {acc} ')

Train dataset: fitness = 99.21478271484375, accuracy = 0.6666666865348816 
Test dataset: fitness = 96.86466979980469, accuracy = 0.6842105388641357 
