# Task 2.4 (a)

The accompanying python code file CodeP2.5F23.ipynb also contains a skeleton program to train a neural <br>
network to predict the system output parameters [α, ηsys] for the input operating condition parameters <br>
[T1, γ, Qs]. This code CodeP2.5F23.ipynb differs from CodeP2.4F23.ipynb in that the weights and bias values <br>
for optimal fit are determined using a genetic algorithm. To do so, the machinery that facilitates the <br>
backpropagation calculations in CodeP2.4F23.ipynb is replaced by code that minimizes the loss function using a<br>
genetic algorithm. This code makes use of a special python code package: pygad which is not part of the usual<br>
standard python software installation.

(a) The pygad package needs to be installed, and tips on how to do so are indicated below:

In [None]:
!pip install pygad
'''To use pygad, first import it.'''
import tensorflow.keras
import numpy as np
import pygad
print('pygad installed correctly if no error messages')

## Task 2.4 (b)

Once pygad is installed and you have copied the contents of file CodeP2.5F23 into one or two subsequent<br>
cells of a notebook, make the following changes to this program:

(i) Install the full data set (normalized with median values) that you used in CodeP2.4F23.ipynb in Tasks 2.1 and 2.2 into the data prep section of CodeP2.5F23.ipynb.

In [None]:
#CodeP2.5F23  ME249   V.P. Carey
# Import relevant packages
import json # Package for loading data
import tensorflow
import tensorflow.keras
import pygad.kerasga
import pygad
#the following 2 lines are only needed for Mac OS machines 
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

# ====================================================
# Task 2.1 is on a separate .ipynb file. This section 
# is for loading the json files with the corresponding
# pre-process data
# ====================================================
# Load normalized data for xdata, ydata, and median values
with open("n_xdata.json", "r") as xdata_file:
    n_xdata_dict = json.load(xdata_file)
with open("n_ydata.json", "r") as ydata_file:
    n_ydata_dict = json.load(ydata_file)    
with open("x_medians.json", "r") as x_medians_file:
    x_medians_dict = json.load(x_medians_file)    
with open("y_medians.json", "r") as y_medians_file:
    y_medians_dict = json.load(y_medians_file)
    
# Access the data from the dictionaries
xdata = n_xdata_dict["data"]
ydata = n_ydata_dict["data"]
x_medians = x_medians_dict["data"]
y_medians = y_medians_dict["data"]

# Assign meadian values of input variables
Tmed = x_medians[0]
gamed = x_medians[1]
qsmed = x_medians[2]

# Assign meadian values of output variables
almed = y_medians[0]
efmed = y_medians[1]

# T1 [Kelvins], gamma, Qs [kW]:
xarray = np.array(xdata)
# Alpha, EffSys:
yarray = np.array(ydata)

# Print data for verification
# print(xdata)
# print(xarray)
# print(ydata)
# print(yarray)

# Define inputs & outputs for Keras model
data_inputs = np.array(xdata)
data_outputs = np.array(ydata)


## Task 2.4 (b)

(ii) in the initializer, set minval = -0.9 and maxval = 0.9<br>
(iii) set num_generations = 1500

For all the cases to be tested, leave these parameters unchanged: num_solutions=40<br>
num_parents_mating = 7<br>
sol_per_pop = 37 (batch size)<br>
parent_selection_type = "sss" (steady state replacement) keep_parents = 5 (number kept per generation)<br> crossover_type = "single_point"<br>
mutation_type = "random"<br>
mutation_percent_genes = "default" (= 10%)

In [None]:
# Create empty lists to store MAE and MSE values
mae_values = []
mse_values = []

#===define fitness function used in Genetic Algorithm - added ga_instance input as needed for
#      In PyGAD 2.20.0, the fitness function must accept 3 parameters:
#      1) The instance of the 'pygad.GA' class.
#      2) A solution to calculate its fitness value.
#      3) The solution's index within the population.
    
def fitness_func(ga_instance, solution, sol_idx):
    global data_inputs, data_outputs, keras_ga, model

    model_weights_matrix = pygad.kerasga.model_weights_as_matrix(model=model,
                                                                 weights_vector=solution)

    model.set_weights(weights=model_weights_matrix)

    predictions = model.predict(data_inputs)

    mae = tensorflow.keras.losses.MeanAbsoluteError()
    #data output array and predictions array are provided
    #-->error for each data point is calcuated and mean abs error is computed for them
    #thus a mean fitness is determined for each solution using all data points
    meanabs_error = mae(data_outputs, predictions).numpy() + 0.00000001
    solution_fitness = 1.0/meanabs_error

    return solution_fitness

#========================END

#===define callback that keeps track of best solution for each generation and saves the best of
#      of all of them over the generations analyzed
def callback_generation(ga_instance):
    print("Generation = {generation}".format(generation=ga_instance.generations_completed))
    print("Fitness    = {fitness}".format(fitness=ga_instance.best_solution()[1]))
    solution, solution_fitness, solution_idx = ga_instance.best_solution()
    
    # Calculate MAE
    model_weights_matrix = pygad.kerasga.model_weights_as_matrix(model=model, weights_vector=solution)
    model.set_weights(weights=model_weights_matrix)
    predictions = model.predict(data_inputs)
    mae = tensorflow.keras.losses.MeanAbsoluteError()
    mse = tensorflow.keras.losses.MeanSquaredError()
    abs_error = mae(data_outputs, predictions).numpy()
    squared_error = mse(data_outputs, predictions).numpy()
    
    # Store MAE and MSE in the lists
    mae_values.append(abs_error)
    mse_values.append(squared_error)
    
#========================END


#===defining a sequential Neural Network

#initialize weights with values between minval and maxval
initializer = tensorflow.keras.initializers.RandomUniform(minval= -2.9, maxval= 2.9)

model = tensorflow.keras.Sequential([
    tensorflow.keras.layers.Dense(4, activation='relu', input_shape=[3], kernel_initializer=initializer), 
    tensorflow.keras.layers.Dense(5, activation='relu', kernel_initializer=initializer),
    tensorflow.keras.layers.Dense(4, activation='relu', kernel_initializer=initializer),
    tensorflow.keras.layers.Dense(3, activation='relu', kernel_initializer=initializer),
    tensorflow.keras.layers.Dense(2, kernel_initializer=initializer)
  ])
#Print summary of model features
model.summary()


#===define a vector that contains all the weights for Neural Network model
weights_vector = pygad.kerasga.model_weights_as_vector(model=model)


#===KerasGA Info below
'''KerasGA is part of the PyGAD library for training Keras models using the genetic algorithm (GA).
The KerasGA project has a single module named kerasga.py which has a class named KerasGA for 
preparing an initial population of Keras model parameters.

PyGAD is an open-source Python library for building the genetic algorithm 
and training machine learning algorithms. 
Check the library's documentation at Read The Docs: https://pygad.readthedocs.io'''

#===Appears to instantiate a keras genetic algorithm object in which the genes are the weights for 
#      NN model, and the population is 40 solutions - just used to set initial population
keras_ga = pygad.kerasga.KerasGA(model=model,
                                 num_solutions=40)




#===set number of generations to run, and number of best fitness solutions to
#      keep and mate in each generation
num_generations = 1500
num_parents_mating = 7      #Number of solutions to be selected as parents.

'''sol_per_pop = number of best solutions kept??'''
sol_per_pop = 37  # if fitness_batch_size is supported to calculate the fitness function in batches, 
                  # then the solutions are grouped into batches of size
    
parent_selection_type = "sss"
'''Steady State Selection
In every generation few chromosomes are selected 
(good - with high fitness) for creating a new offspring. 
Then some (bad - with low fitness) chromosomes are removed 
and the new offspring is placed in their place. 
The rest of population survives to new generation.'''

keep_parents = 5
''' keep_parents=-1: Number of parents to keep in the current population. 
    -1 (default) means to keep all parents in the next population. 
    0 means keep no parents in the next population. A value greater than 0 
    means keeps the specified number of parents in the next population. '''

crossover_type = "single_point" 
''' crossover_type="single_point": Type of the crossover operation. Supported types are single_point 
    (for single-point crossover), two_points (for two points crossover), uniform (for uniform crossover), 
    and scattered (for scattered crossover). Scattered crossover is supported from PyGAD 2.9.0 and higher. 
    It defaults to single_point.'''

mutation_type = "random"
''' mutation_type="random": Type of the mutation operation. Supported types are random (for random mutation), 
    swap (for swap mutation), inversion (for inversion mutation), scramble (for scramble mutation), and 
    adaptive (for adaptive mutation). It defaults to random.'''

mutation_percent_genes = "default"
''' mutation_percent_genes="default": Percentage of genes to mutate. It defaults to the string "default" 
    which is later translated into the integer 10 which means 10% of the genes will be mutated. 
    It must be >0 and <=100. Out of this percentage, the number of genes to mutate is deduced 
    which is assigned to the mutation_num_genes parameter.'''

    
#===set initial population as the neural network weights
initial_population = keras_ga.population_weights


#===instantiate a pygad genetic algorithm object with desired attributes
ga_instance = pygad.GA(num_generations=num_generations, 
                       num_parents_mating=num_parents_mating, 
                       initial_population=initial_population,
                       fitness_func=fitness_func,
                       sol_per_pop=sol_per_pop,
                       parent_selection_type = parent_selection_type,
                       keep_parents = keep_parents,
                       crossover_type = crossover_type,
                       mutation_type = mutation_type,
                       mutation_percent_genes = mutation_percent_genes,
                       on_generation=callback_generation)


#===run this instance of GA object that trains NN
ga_instance.run()

#===the following just plots and prints results after run for specified number of generations
# After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations.
ga_instance.plot_fitness(title="PyGAD & Keras - Iteration vs. Fitness", linewidth=4)

# Returning the details of the best solution.
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))

# Fetch the parameters of the best solution.
best_solution_weights = pygad.kerasga.model_weights_as_matrix(model=model,
                                                              weights_vector=solution)
model.set_weights(best_solution_weights)
predictions = model.predict(data_inputs)
print("Predictions : \n", predictions)

print("data_outputs : \n", data_outputs)

mae = tensorflow.keras.losses.MeanAbsoluteError()
abs_error = mae(data_outputs, predictions).numpy()
print("Absolute Error : ", abs_error)
#========================END

In [None]:
#SETTING UP PLOT
%matplotlib inline
# importing the required module
import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = [8, 8] # for square canvas 
#========

'''CALCULATE PREDICTED VALUES, RETRIEVE DATA VALUES AND PLOT'''

plt.scatter(predictions[:, 0], data_outputs[:, 0])
plt.title('Genetic Algorithm training of Neural Network ==> output = α')
plt.xlabel('predicted output for NN (units)') 
plt.ylabel('data output (units)')
plt.loglog()
plt.xlim(xmax = 10, xmin = 0.1)
plt.ylim(ymax = 10, ymin = 0.1)
# Generate red y=x line
x_data = np.linspace(0.1, 10.0, num=3)
y_data = x_data
plt.plot(x_data, y_data, color='red')
plt.show()

plt.scatter(predictions[:, 1], data_outputs[:, 1])
plt.title('Genetic Algorithm training of Neural Network ==> output = ηsys')
plt.xlabel('predicted output for NN (units)') 
plt.ylabel('data output (units)')
plt.loglog()
plt.xlim(xmax = 1.2, xmin = 0.8)
plt.ylim(ymax = 1.2, ymin = 0.8)
# Generate red y=x line
x_data = np.linspace(0.8, 1.2, num=3)
y_data = x_data
plt.plot(x_data, y_data, color='red')
plt.show()


In [None]:
# =======================================
# RMSE calculations of predicted vs. data
# =======================================
rmse_alpha = np.sqrt(np.mean(np.square(data_outputs[:,0] - predictions[:,0])))
rmse_eff = np.sqrt(np.mean(np.square(data_outputs[:,1] - predictions[:,1])))

print("RMSE for Alpha:", rmse_alpha)
print("RMSE for Eff:", rmse_eff)


## Saving Neural Network Trained Models

In [None]:
model.save("model_case_5_run_6.h5")

In [None]:
# Plot MAE vs. generation
import matplotlib.pyplot as plt

generations = range(1, num_generations + 1)
plt.plot(generations, mae_values)
plt.xlabel('Generation')
plt.ylabel('Mean Absolute Error (MAE)')
plt.title('Combined MAE vs. Generation')
plt.show()

In [None]:
# Plot MAE & MSE vs. generation
import matplotlib.pyplot as plt

generations = range(1, num_generations + 1)
plt.figure(figsize=(12, 6))
plt.plot(generations, mae_values, label='MAE')
plt.plot(generations, mse_values, label='MSE')
plt.xlabel('Generation')
plt.ylabel('Error')
plt.title('Combined MAE & MSE vs. Generation')
plt.legend()
plt.show()