In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn import model_selection

from evopipe import steps, evopipe

from sklearn.model_selection import train_test_split
from sklearn import metrics

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
filename = 'datasets/magic.csv'

data = pd.read_csv(filename, sep=',')
features = data[data.columns[:-1]]
target = data[data.columns[-1]]
le = preprocessing.LabelEncoder()

ix = target.index
target = pd.Series(le.fit_transform(target), index=ix)

train_X, test_X, train_Y, test_Y = train_test_split(features, target, test_size = 0.25, random_state = 0)

params = steps.get_params(len(train_X.iloc[0]))

In [None]:
# score is accuracy for magic
clf = evopipe.EvoPipeClassifier(steps.preproc, steps.clfs, params, ['featsel', 'scaling'], pop_size=300, ngen=30, mutpb=0.5, swap_mutpb=0.2,
                                param_mutpb=0.85, ind_mutpb=0.8, len_mutpb=0.5)
clf.fit(train_X, train_Y, test_X, test_Y)

score = clf.score(test_X, test_Y)
print("\nBest pipeline test score: {}\n".format(score))

best_pipes = clf.best_pipelines()

for pipe, score in best_pipes:
    pipe.fit(train_X, train_Y)
    # res_Y = pipe.predict(test_X)
    
    # score = metrics.cohen_kappa_score(test_Y, res_Y, weights='quadratic')
    
    pipe_named_steps = []
    for key, val in pipe.steps:
        pipe_named_steps.append(key)       
    print("Score: {}, Pipe: {}".format(score, pipe_named_steps))

evaluating: [('FA', OrderedDict([('n_components', 2)]), 'featsel'), ('DT', OrderedDict([('criterion', 'entropy'), ('max_depth', 100), ('max_features', 0.5), ('min_samples_leaf', 10), ('min_samples_split', 10)]))]

In [None]:
print(clf.logbook)

gen = clf.logbook.select("gen")
avgs, mins, maxs, vars = clf.logbook.chapters["fitness"].select("avg", "min", "max", "var")
avgs_tt, mins_tt, maxs_tt, vars_tt = clf.logbook.chapters["train_test"].select("avg", "min", "max", "var")

sns.set()

fig = plt.figure()
ax1 = fig.add_subplot(111)
line1 = ax1.plot(gen, maxs, label='Maximum Fitness')
ax1.set_xlabel("Generation")
ax1.set_ylabel("Fitness")

line2 = ax1.plot(gen, avgs, label='Average Fitness')

line3 = ax1.plot(gen, maxs_tt, label='Maximum Test score')
line4 = ax1.plot(gen, avgs_tt, label='Average Test score')

lines = line1 + line2 + line3 + line4
labels = [l.get_label() for l in lines]
legend = plt.legend(lines, labels, loc='lower right', frameon=True)


plt.show()