**Travaux dirigés No 5: régression symbolique**

In [1]:
## Installation du FrameWork DEAP
!pip3 install deap



In [2]:
#Import d'outils pythons
import random
import operator
import math
import numpy
import pandas as pd
##Import les outils deap
from deap import base
from deap import creator
from deap import tools
from deap import gp
from deap import algorithms
from sklearn.metrics import mean_squared_error

In [3]:
##Ajout des protecteurs sur les operations
def pro_div(*args):
    try:
        res = operator.truediv(*args)
        if not math.isnan(res) and not math.isinf(res):
            return res
    except ZeroDivisionError:
        pass
    return 1


def pr_log(arg):
    try : 
        res = math.log(arg)
        if not math.isnan(res) and not math.isinf(res):
            return res
    except ValueError :
        pass
    return 1

In [4]:
##Ajout des opérations  arithmétiques
pset = gp.PrimitiveSet("MAIN", 2)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(pro_div, 2)
pset.addPrimitive(pr_log, 1)
pset.addPrimitive(operator.neg, 1)

In [5]:
#Ajout des opérations trigonométriques
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)

In [6]:
##Ajout de constante aleatoire ephemère
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))
#Rename les arguments genérés par les opérateurs
pset.renameArguments(ARG0='x')

In [7]:
#Crée l'objet de fitness
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
#Crée l'objet individu en se basant sur un arbre 
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

In [8]:
#Création de notre Toolbox
toolbox = base.Toolbox()
#outils de creation d'un individu et d'une population
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

In [9]:
# En statistiques, l’erreur quadratique moyenne  est synonyme a Mean Squared Error (MSE) en
# Anglais.
# Cette fonction existe dans scki kit learn
# --> https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
toolbox.register("mse_opti", lambda ind, predy, ybool : mean_squared_error(ybool, predy))

In [10]:
## fct d'evaluation
def evalSymbReg(individual, xpoints,ybool):
    func = toolbox.compile(expr=individual)
    predy = [func(x1,x2) for x1, x2 in xpoints]
    res = toolbox.mse_opti(individual, ybool, predy)
    return (res,)

In [11]:
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

In [12]:
def main(toolbox):
    pop = toolbox.population(n=100)
    hof = tools.HallOfFame(1)
    
    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 80, stats=mstats,
                                   halloffame=hof, verbose=True)
    # print log
    return log, hof

In [13]:
#Importation du dataset de l'exemple dans la consigne
df =  pd.read_csv("dataset.tsv", sep="\t")
#Affichage du dataset
print(df)

    x1   x2    y
0  1.0  0.0  1.0
1  0.0  1.0  1.0
2  0.0  0.0  0.0
3  1.0  1.0  2.0
4  1.0  2.0  3.0
5  2.0  1.0  3.0


In [16]:
vl = list(zip(df["x2"],df["y"]))
toolbox.register("evaluate", evalSymbReg, xpoints = vl, ybool = df[['y']])

In [17]:
main(toolbox)

   	      	                    fitness                    	                      size                     
   	      	-----------------------------------------------	-----------------------------------------------
gen	nevals	avg    	gen	max    	min	nevals	std    	avg 	gen	max	min	nevals	std    
0  	100   	5.82953	0  	58.6667	0  	100   	9.50751	3.38	0  	6  	2  	100   	1.29445
1  	67    	2.50922	1  	23     	0  	67    	3.09581	3.41	1  	8  	1  	67    	1.32737
2  	49    	1.83162	2  	10.3333	0  	49    	2.18505	3.32	2  	8  	1  	49    	1.08517
3  	57    	1.74789	3  	16     	0  	57    	2.71331	3.21	3  	7  	1  	57    	1.00295
4  	55    	1.87543	4  	55.3333	0  	55    	5.77725	3.04	4  	6  	1  	55    	0.82365
5  	50    	1.34708	5  	16     	0  	50    	2.78139	2.89	5  	6  	1  	50    	1.10359
6  	47    	1.29549	6  	32.3333	0  	47    	3.97447	2.55	6  	7  	1  	47    	1.29132
7  	61    	0.721457	7  	16     	0  	61    	2.10571	2.16	7  	8  	1  	61    	1.41223
8  	59    	0.374935	8  	8.33333	0  	59    	1.25

([{'gen': 0, 'nevals': 100},
  {'gen': 1, 'nevals': 67},
  {'gen': 2, 'nevals': 49},
  {'gen': 3, 'nevals': 57},
  {'gen': 4, 'nevals': 55},
  {'gen': 5, 'nevals': 50},
  {'gen': 6, 'nevals': 47},
  {'gen': 7, 'nevals': 61},
  {'gen': 8, 'nevals': 59},
  {'gen': 9, 'nevals': 40},
  {'gen': 10, 'nevals': 55},
  {'gen': 11, 'nevals': 58},
  {'gen': 12, 'nevals': 56},
  {'gen': 13, 'nevals': 58},
  {'gen': 14, 'nevals': 57},
  {'gen': 15, 'nevals': 57},
  {'gen': 16, 'nevals': 66},
  {'gen': 17, 'nevals': 58},
  {'gen': 18, 'nevals': 60},
  {'gen': 19, 'nevals': 48},
  {'gen': 20, 'nevals': 61},
  {'gen': 21, 'nevals': 58},
  {'gen': 22, 'nevals': 50},
  {'gen': 23, 'nevals': 58},
  {'gen': 24, 'nevals': 52},
  {'gen': 25, 'nevals': 51},
  {'gen': 26, 'nevals': 51},
  {'gen': 27, 'nevals': 52},
  {'gen': 28, 'nevals': 46},
  {'gen': 29, 'nevals': 58},
  {'gen': 30, 'nevals': 53},
  {'gen': 31, 'nevals': 56},
  {'gen': 32, 'nevals': 58},
  {'gen': 33, 'nevals': 64},
  {'gen': 34, 'nevals':