# Task 6-baysian_optimization by Clayton Christian

## Import Libraries

In [None]:
import tensorflow as tf
# !pip install GPyOpt
from GPyOpt.methods import BayesianOptimization
import matplotlib.pyplot as plt
import numpy as np
import csv

# Load the datasets:
We're using a complete collection of William Shakespeare's works, The Book of Mormon, The Gospel of Buddha, Marcus Arelias Meditations, The Old Testament, and the Koran. All files are uploaded as .txt files.

In [3]:
text_files = ['data/book_of_mormon.txt', 'data/gospel_of_buddha.txt', 'data/meditations.txt',
              'data/old_testament.txt', 'data/t8.shakespeare.txt', 'data/the_koran.txt']

for text_file in text_files:

    # Load and preprocess the text data
    with open(text_file, 'r') as file:
        text = file.read()


# Preprocess the Data

Basically, converting the characters to integers to create an input-output sequence for training our RNN.

In [4]:
chars = sorted(list(set(text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# Create input-output sequences
seq_length = 100
dataX, dataY = [], []
for i in range(0, len(text) - seq_length, 1):
    seq_in = text[i:i + seq_length]
    seq_out = text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

## We're using a simple RNN model in TensorFlow, with the parameters defined by the school project.

In [5]:
def create_model(learning_rate, num_units, dropout_rate):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(
            len(chars), num_units, input_length=seq_length),
        tf.keras.layers.SimpleRNN(num_units, return_sequences=True),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.SimpleRNN(num_units),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(len(chars), activation='softmax')
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=optimizer, metrics=['accuracy'])
    return model

## Here we're defining the **obective function** for our **baysian optimization**. We're defining a function that **takes the hyperparameters as input and returns our validation loss**.

In [6]:
def objective_function(hyperparameters):
    learning_rate, num_units, dropout_rate, batch_size = hyperparameters[0]
    num_units = int(num_units)
    batch_size = int(batch_size)

    model = create_model(learning_rate, num_units, dropout_rate)

    # Use early stopping
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=3)

    # Train the model
    history = model.fit(dataX, dataY, validation_split=0.2, epochs=30,
                        batch_size=batch_size, verbose=0, callbacks=[early_stopping])

    # Return the validation loss of the last epoch
    return history.history['val_loss'][-1]

# Here we are running our **Baysian Optimization**

In [None]:
domain = [
    {'name': 'learning_rate', 'type': 'continuous', 'domain': (1e-4, 1e-2)},
    {'name': 'num_units', 'type': 'discrete', 'domain': (32, 64, 128, 256)},
    {'name': 'dropout_rate', 'type': 'continuous', 'domain': (0.1, 0.5)},
    {'name': 'batch_size', 'type': 'discrete', 'domain': (32, 64, 128)}
]

optimizer = BayesianOptimization(
    f=objective_function, domain=domain, max_iter=30)
optimizer.run_optimization()

## Plot the Convergence of the optimization process to observe **validation loss decreases** over time.

In [None]:
optimizer.plot_convergence()
plt.show()

# Save optimization report as .txt file

In [None]:
with open('bayes_opt.txt', 'w') as file:
    file.write(str(optimizer.X))
    file.write('\n')
    file.write(str(optimizer.Y))

# Save optimizztion report as .csv file

In [None]:
# Assuming optimizer is your BayesianOptimization object
hyperparameters = optimizer.X  # Matrix of hyperparameters
objective_values = optimizer.Y  # Array of objective function values

# Define the header for the CSV file
header = ['learning_rate', 'num_units',
          'dropout_rate', 'batch_size', 'objective_value']

# Open the CSV file in write mode
with open('optimization_report.csv', 'w', newline='') as file:
    writer = csv.writer(file)

    # Write the header
    writer.writerow(header)

    # Write the hyperparameters and objective values
    for i in range(len(objective_values)):
        # Combine hyperparameters and objective value into a single row
        row = list(hyperparameters[i]) + [objective_values[i]]
        writer.writerow(row)