# Part 3 NSGA II

> Code is inspired from:
> 
> https://medium.com/@rossleecooloh/optimization-algorithm-nsga-ii-and-python-package-deap-fca0be6b2ffc
>
> https://github.com/DEAP/deap/blob/master/examples/ga/nsga2.py
>
>  https://github.com/DEAP/deap/blob/master/deap/tools/emo.py

In [1]:
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
import re
import pandas as pd
import numpy as np
from copy import deepcopy
from distutils.command.build_scripts import first_line_re
from tkinter.tix import COLUMN
# Import deque for the stack structure, copy for deep copy nodes
from collections import deque
from sklearn.metrics import accuracy_score
import sklearn 
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import (DecisionTreeClassifier, DecisionTreeRegressor,
                          ExtraTreeClassifier)
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
# Encoding categorical features with preserving the missing values in incomplete features
from sklearn.preprocessing import (KBinsDiscretizer, LabelEncoder,
                                   OneHotEncoder, OrdinalEncoder,
                                   StandardScaler)
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from matplotlib import pyplot

import array
import random
import json



from math import sqrt

from deap import algorithms
from deap import base
from deap import benchmarks
from deap.benchmarks.tools import diversity, convergence, hypervolume
from deap import creator
from deap import tools


In [2]:

# define some constants for the genetic algorithm
CONSTANTS_DICT = {
    "POPULATION_SIZE": 100, # number of individuals in each population
    "MAX_GENERATIONS": 250, # number of generations to run the algorithm
    "CROSSOVER_RATE": 1.0, # crossover rate should always be 100%, based on slides
    "MUTATION_RATE": 0.2, # mutation rate
    "ELITIST_PERCENTAGE": 0.05, # percentage of the best individuals to keep in the next generation
    "CLASSIFIER": KNeighborsClassifier(), # classifier to use
}


In [3]:
class DatasetPart3:
    def __init__(self, df) :
        self.df=df
        self.x = self.df.iloc[:,:-1]
        self.y = self.df.iloc[:,-1]
        self.M = self.df.shape[0]  # number of rows
    
    @classmethod
    def constructFromFile(cls, filePath):
        pass

class Vehicle(DatasetPart3):
    def __init__(self, df):
        super().__init__(df)
    
    @classmethod
    def constructFromFile(cls, filePath):
        df = pd.read_csv(filePath, header=None, delim_whitespace=True)
        df.columns = [f"f_{i}" for i in range(len(df.columns))]
        df.rename(columns = {f'f_{len(df.columns)-1}':'class'}, inplace = True)
        return cls(df)
    
class MuskClean(DatasetPart3):
    def __init__(self, df):
        super().__init__(df)

    @classmethod
    def constructFromFile(cls, filePath):
        df = pd.read_csv(filePath, header=None)
        # ignore the first 2 columns since they are NOT numerical, so it would be betteer to ignore them 
        df.drop([0,1], axis=1, inplace=True)
        df.columns = [f"f_{i}" for i in range(len(df.columns))]
        df.rename(columns = {f'f_{len(df.columns)-1}':'class'}, inplace = True)
        return cls(df)
ds_vehicle = Vehicle.constructFromFile("./vehicle/vehicle.dat")
ds_vehicle.df


Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,class
0,95,48,83,178,72,10,162,42,20,159,176,379,184,70,6,16,187,197,van
1,91,41,84,141,57,9,149,45,19,143,170,330,158,72,9,14,189,199,van
2,104,50,106,209,66,10,207,32,23,158,223,635,220,73,14,9,188,196,saab
3,93,41,82,159,63,9,144,46,19,143,160,309,127,63,6,10,199,207,van
4,85,44,70,205,103,52,149,45,19,144,241,325,188,127,9,11,180,183,bus
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
841,93,39,87,183,64,8,169,40,20,134,200,422,149,72,7,25,188,195,saab
842,89,46,84,163,66,11,159,43,20,159,173,368,176,72,1,20,186,197,van
843,106,54,101,222,67,12,222,30,25,173,228,721,200,70,3,4,187,201,saab
844,86,36,78,146,58,7,135,50,18,124,155,270,148,66,0,25,190,195,saab


In [4]:
ds_mushclean = MuskClean.constructFromFile("./musk/clean1.data")
ds_mushclean.df

Unnamed: 0,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,f_9,...,f_157,f_158,f_159,f_160,f_161,f_162,f_163,f_164,f_165,class
0,42,-198,-109,-75,-117,11,23,-88,-28,-27,...,-74,-129,-120,-38,30,48,-37,6,30,1.0
1,42,-191,-142,-65,-117,55,49,-170,-45,5,...,-302,60,-120,-39,31,48,-37,5,30,1.0
2,42,-191,-142,-75,-117,11,49,-161,-45,-28,...,-73,-127,-120,-38,30,48,-37,5,31,1.0
3,42,-198,-110,-65,-117,55,23,-95,-28,5,...,-302,60,-120,-39,30,48,-37,6,30,1.0
4,42,-198,-102,-75,-117,10,24,-87,-28,-28,...,-73,-127,51,128,144,43,-30,14,26,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,49,-199,-161,29,-95,-86,-48,2,112,-79,...,-246,-209,33,152,134,47,-43,-15,-10,0.0
472,38,-123,-139,30,-117,-88,214,-13,-74,-129,...,-226,-210,20,55,119,79,-28,4,74,0.0
473,43,-102,-20,-101,-116,200,-166,66,-222,-49,...,32,136,-15,143,121,55,-37,-19,-36,0.0
474,39,-58,27,31,-117,-92,85,21,-73,-68,...,-232,-206,13,45,116,79,-28,3,74,0.0


set up creator

In [None]:
# 2 minimum objectives, so -1,-1
creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0)) 