In [27]:
import numpy as np
import matplotlib.pyplot as plt
import time


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Layer,Dense,Multiply,Input,Activation
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential

Create customized neuron and parameters

In [28]:
''' 
We create a single nuron NN with a customized activation function.


Inputs are given by
- units:int = number of neurons in the NN (1 -> single neuron)
- nvar:int = number of input variables (number of columns in dataset, i.e. number features) 
- npar:int = number of numerical parameters appearing in program (number of float numbers used in program)
- minval:float = lower bound in float range
- maxval:float = upper bound in float range


Build initializes the neuron weights. 
Weights are trainable parameters to be used as float values in symbolic regression after training
- weights appear as a numpy array of shape (units,npar) 
- weights are initially uniformly sampled in the range [minval,maxval]
- weights are set as trainable


Call computes the neuron output:
- activation function is set to program_function that is defined outside the class  
- program_function(inputs, self.w, self.nvar, self.npar) contains the skeleton of the 
  binary tree created by symbolic regression, i.e. the functional dependence but 
  no predefined parameters. (See program_function)
'''

class ProgramNN(Layer):

    def __init__(self, units:int, nvar:int ,npar:int, minval:float, maxval:float):
#         '''Initializes the class and sets up the internal variables'''
        super(ProgramNN, self).__init__()
        self.units = units
        self.npar=npar
        self.nvar=nvar
        self.minval=minval
        self.maxval=maxval
    
    def build(self, input_shape):
#         '''Create the state of the layer (weights)'''
        w_init = tf.random_uniform_initializer(minval=self.minval, maxval=self.maxval)
        self.w= self.add_weight(name="w",shape=(self.units,self.npar),dtype='float32',
                                regularizer=None,
                                initializer=w_init,trainable=True)
        super().build(input_shape)
   
    def call(self, inputs):
#         '''Defines the computation from inputs to outputs'''
        return program_function(inputs, self.w, self.nvar, self.npar)
        


In [47]:
'''
Previously, the symbolic regression created functions as:
  
  data <- dataset given by a pd.Dataset with coumns names ['x_0','x_1','x_2']
  program.program = 'exp(-0.1 * x_0) + 1.2 * x_1 * x_2'
  
Now we need to create a function (program_function) based on the tree structure that takes as inputs:
  
  - X:tf.constant(data.to_numpy()) 
    dataset of features (dataset: [x[0],x[1],x[2],....]  x[i]=features[i]=i-th column)  
    
    !!!X is passed as argument in ProgramNN.call as input !!!
    
  - parameters:tf.constant([par[0],par[1],..])   
    array of parameters to be used in the function
    
    !!! parameters is initialized as w in ProgramNN.build !!!
    
  - nvar:int
    number of columns in dataset
    
  - npar:int 
    number of float number used in program
    
    !!! nvar, npar are set in the ProgramNN.init   !!!


and does the following: 

- split the tensor X (dataset) in its columns  [x_0,x_1,x_2]-> [x_0], [x_1], [x_2]
  so that we can use each variable column independently
  
- split the tensor parameters in single values [par_1,par_2]-> [par_1], [par_2]
  so that we can use each parameter independently

- defines the function (that was given by program.program = 'exp(-0.1 * x_0) + 1.2 * x_1 * x_2') as:

  fun=tf.exp(par[0]*x_0)+par[1]* x_1 * x_2
  
   ######## 
   ##!!!!## np.fun -> tf.func   numpy fuctions need to be transformed in tf.functions
   ##!!!!## pd.dataset -> tf.constant   dataset needs to be transformed in in tf.constant
   ########

- returns fun

'''
def program_function(X:tf.constant,
                     parameters:tf.constant,
                     nvar:int,
                     npar:int):
    
    x=tf.split(X,nvar,1)
    par=tf.split(parameters,npar,1)
    fun=x[0]**2+tf.exp(par[0]*x[2])-tf.abs(par[1]*x[3])
    return fun

Simuliamo un potenziale target (indice)

In [48]:
'''
Simulating a target that has the same functional form as our program from the symbolic regression.
Of course, we need to fix the parameters to well defined values
inputs are given by 
X:tf.constant() = dataset transformed to tensorflow constant
nvar:int = number of columns in dataset (number of variables)
'''
def real_data(X:tf.constant,
              nvar:int):
    x=tf.split(X,nvar,1)
    fun=x[0]**2+tf.exp(1.4*x[2])-tf.abs(0.8*x[3])
    return fun

In [49]:
'''
Customised one step training: 
compute loss gradients
update model trainable weights using optimizer
return new loss value
'''
def one_training_step(model:tf.keras.Model, 
                      x:tf.constant, 
                      y_true:tf.constant, 
                      weights:tf.constant, 
                      opt:tf.keras.optimizers, 
                      loss_function:tf.keras.losses):

    with tf.GradientTape() as tape:

            y_pred=model(x, training=True)

            loss = loss_fun(y_true,y_pred,sample_weight=weights)
            
    grads = tape.gradient(loss, model.trainable_weights)
    opt.apply_gradients(zip(grads, model.trainable_weights))

    return loss


In [75]:
'''
CREATING A FAKE DATASET THAT LOOKS LIKE OURS
'''
import pandas as pd

df = pd.DataFrame()
df['x_0']=np.random.normal(size=(1000,))
df['x_1']=np.random.normal(size=(1000,))
df['x_2']=np.random.normal(size=(1000,))
df['x_3']=np.random.normal(size=(1000,))
dataset_to_tensor=tf.constant(df.to_numpy(),dtype=tf.float32)
df['target']=real_data(dataset_to_tensor,len(df.columns)).numpy()
df['weights']=tf.ones((df.shape[0],1)).numpy()
df.head()

Unnamed: 0,x_0,x_1,x_2,x_3,target,weights
0,-0.22143,1.085235,1.329246,-0.538409,6.048107,1.0
1,1.018574,-0.617236,-0.138118,0.365174,1.569535,1.0
2,-0.979786,0.143209,0.143652,1.312779,1.132519,1.0
3,1.364351,-0.926683,-0.114177,1.026624,1.892427,1.0
4,0.2673,-0.064395,0.527368,-2.955694,-0.2007,1.0


In [76]:
'''
Create a function to define how to split the dataset
(Noi ce l'abbiamo già credo, comunque non fa nient'altro che prendermi le colonne che mi pare
 per dividere le x dal target e dai pesi)
'''
def to_split_map(split_range):
    def split_window(df):  
        features = tf.slice(df,[0,split_range[0][0]],[-1,split_range[0][1]])
        labels = tf.slice(df,[0,split_range[1][0]],[-1,split_range[1][1]])
        weights = tf.slice(df,[0,split_range[2][0]],[-1,split_range[2][1]])
        return features, labels, weights
    return split_window

In [97]:
'''
Training della rete, calcolo dei parametri e della fitness


N.B. In realtà potremmo anche decidere di fare sta cosa su un subset del dataset totale
'''

t=time.time()
nvar=4         #number of features
npar=2         #number of parameters in program
minval=-1.      #minimum value to initialize the parameters
maxval=1.      #maximum value to initialize the parameters
epochs=1       #number of epochs
batch_size=16   #number of data points per batch

#CREATE TF VARIABLES FROM DATASET (to be changed, can be easier)
split_range=[[0,4],[4,1],[5,1]]                   #columns related to features, target, weights [start,n.columns]
split_map=to_split_map(split_range)               #split map
tfdf=tf.constant(df.to_numpy(),dtype=tf.float32)  #pandas Dataset into tf.constant
X,y_true,weights=split_map(tfdf)                  #input, target, weights from map

#CREATE NEURON
inputs = Input(shape=[nvar], name="Input")
program = ProgramNN(units=1, nvar=nvar,npar=npar, minval=minval, maxval=maxval)(inputs)
model = Model(inputs=inputs, outputs=program)

#DEFINE LOSS FUNCTION AND OPTIMIZER
loss_mse = tf.keras.losses.MeanSquaredError()     #weights are passed to model.fit as sample_weight
opt= tf.keras.optimizers.Adam(learning_rate=1e-2)

#COMPILE THE MODEL
model.compile(loss=loss_mse, optimizer=opt,run_eagerly=False)

#TRAIN THE MODEL
history = model.fit(X, y_true, sample_weight=weights, batch_size=1, epochs=epochs,verbose=0)

#COMPUTE FINAL FITNESS AND PARAMETERS
fitness=(history.history['loss'][-1])
final_parameters=model.get_weights()[0][0]
print(f'Real parameters: [1.4, (+/-) 0.8]')
print(f'Estimate parameters: {final_parameters}')
print(f'Final Loss: {fitness}')
print(f'Training time: {time.time()-t}')

2021-12-03 19:09:19.168318: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Real parameters: [1.4, (+/-) 0.8]
Estimate parameters: [ 1.4013735  -0.79195994]
Final Loss: 2.068160057067871
Training time: 2.1561927795410156
