In [None]:
from tensorflow import keras
from keras import layers
from sklearn import preprocessing
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as display
from itertools import product

best_loss = float('inf')
best_hyperparameters = None

source_dir = r'C:\Users\bartlomiej.kielan\Desktop\codes\azure-ml-internal-project\data\aggregated'
csv_files = [file for file in os.listdir(source_dir) if file.endswith('.csv')]

dfs = []

for filename in csv_files:
    filepath = os.path.join(source_dir, filename)
    temp_df = pd.read_csv(filepath)
    dfs.append(temp_df)

combined_df = pd.concat(dfs, ignore_index=True)
country_region = combined_df['Country_Region']
combined_df.drop(['Country_Region'], axis=1, inplace=True)
numeric_columns = combined_df.select_dtypes(include=['float64', 'int64']).columns
data_numeric = combined_df[numeric_columns]

stand_scaler = preprocessing.MinMaxScaler()
data_scaled = stand_scaler.fit_transform(data_numeric)

def sdae_reduction(data_scaled, dim_num=2, hyperparameters=None):
    global best_loss
    global best_hyperparameters

    num_layers_values, layer_dim_values, activation_ch, epoch_num, batch_size_num, learning_rate_num = hyperparameters

    neurons_per_layer = [
        max(int(layer_dim_values / (2**i)), 2)
        for i in range(num_layers_values)
    ]

    encoder_input = layers.Input(shape=(data_scaled.shape[1],))
    encoder_output = encoder_input
    for dim in neurons_per_layer[:-1]:
        encoder_output = layers.Dense(dim, activation=activation_ch)(encoder_output)

    encoded_layer = layers.Dense(dim_num, activation=activation_ch)(encoder_output)
    encoder = keras.models.Model(encoder_input, encoded_layer)

    decoder_input = layers.Input(shape=(dim_num,))
    decoder_output = decoder_input
    for dim in reversed(neurons_per_layer[:-1]):
        decoder_output = layers.Dense(dim, activation=activation_ch)(decoder_output)
    decoder_output = layers.Dense(data_scaled.shape[1], activation='sigmoid')(decoder_output)
    decoder = keras.models.Model(decoder_input, decoder_output)

    autoencoder_input = encoder_input
    autoencoder_output = decoder(encoded_layer)
    autoencoder = keras.models.Model(autoencoder_input, autoencoder_output)

    opt = keras.optimizers.Adam(learning_rate=learning_rate_num)
    autoencoder.compile(loss="mse", optimizer=opt)

    loss_values = []
    epoch_numbers = []

    for epoch in range(epoch_num):
        history = autoencoder.fit(
            data_scaled,
            data_scaled,
            epochs=1,
            batch_size=batch_size_num,
            verbose=1
        )

        loss_values.append(history.history['loss'][0])
        epoch_numbers.append(epoch + 1)

        plt.clf()
        plt.plot(epoch_numbers, loss_values, label='Training Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        display.clear_output(wait=True)
        display.display(plt.gcf())

        if best_hyperparameters is None or history.history['loss'][0] < best_loss:
            best_loss = history.history['loss'][0]
            best_hyperparameters = {
                'num_layers_values': num_layers_values,
                'layer_dim_values': layer_dim_values,
                'activation': activation_ch,
                'epochs': epoch + 1,
                'batch_size': batch_size_num,
                'learning_rate': learning_rate_num
            }
            models_directory = r'C:\Users\bartlomiej.kielan\Desktop\codes\azure-ml-internal-project\pipeline\training\models'

            model_filename = os.path.join(models_directory, 'best_ae_model.keras')

            autoencoder.save(model_filename)

    display.clear_output()

hyperparameter_grid = product(
    [3, 4, 5, 6, 7, 8, 9, 10],
    [10, 15, 20, 25, 30],
    ["relu", "sigmoid", "tanh"],
    range(100, 1001, 100),
    [128],
    np.linspace(0.0001, 0.1, num=20)
)

for hyperparameters in hyperparameter_grid:
    num_layers_values, layer_dim_values, activation_ch, epoch_num, batch_size_num, learning_rate_num = hyperparameters
    print(f"Hyperparameters: {hyperparameters}")

    encoded_data = sdae_reduction(data_scaled, dim_num=2, hyperparameters=hyperparameters)
    print(f'Best Loss: {best_loss}')
    if best_hyperparameters:
        print(f'Best Hyperparameters: {best_hyperparameters}')

encoded_df = pd.DataFrame(encoded_data, columns=['x', 'y'])
encoded_df['Country_Region'] = country_region

encoded_csv_directory = r'C:\Users\bartlomiej.kielan\Desktop\codes\azure-ml-internal-project\pipeline\training\hyperparameter_tuning'

encoded_csv_path = os.path.join(encoded_csv_directory, 'encoded_data.csv')
encoded_df.to_csv(encoded_csv_path, index=False)


In [None]:
from tensorflow import keras
from keras import layers
import pandas as pd
import os
import numpy as np
from itertools import product
import asyncio
import nest_asyncio

nest_asyncio.apply()

source_dir = r'C:\Users\bartlomiej.kielan\Desktop\codes\azure-ml-internal-project\data\aggregated'
csv_files = [file for file in os.listdir(source_dir) if file.endswith('.csv')]

dfs = []

for filename in csv_files:
    filepath = os.path.join(source_dir, filename)
    temp_df = pd.read_csv(filepath)
    dfs.append(temp_df)

combined_df = pd.concat(dfs, ignore_index=True)
combined_df.drop(['Country_Region'], axis=1, inplace=True)
numeric_columns = combined_df.select_dtypes(include=['float64', 'int64']).columns
data_numeric = combined_df[numeric_columns]
data_scaled = data_numeric.values

async def sdae_reduction(data_scaled, dim_num=2, hyperparameters=None):
    global best_loss
    global best_hyperparameters

    num_layers_values, layer_dim_values, activation_ch, epoch_num, batch_size_num, learning_rate_num = hyperparameters

    neurons_per_layer = [
        max(int(layer_dim_values / (2**i)), 2)
        for i in range(num_layers_values)
    ]

    encoder_input = layers.Input(shape=(data_scaled.shape[1],))
    encoder_output = encoder_input
    for dim in neurons_per_layer[:-1]:
        encoder_output = layers.Dense(dim, activation=activation_ch)(encoder_output)

    encoded_layer = layers.Dense(dim_num, activation=activation_ch)(encoder_output)
    encoder = keras.models.Model(encoder_input, encoded_layer)

    decoder_input = layers.Input(shape=(dim_num,))
    decoder_output = decoder_input
    for dim in reversed(neurons_per_layer[:-1]):
        decoder_output = layers.Dense(dim, activation=activation_ch)(decoder_output)
    decoder_output = layers.Dense(data_scaled.shape[1], activation='sigmoid')(decoder_output)
    decoder = keras.models.Model(decoder_input, decoder_output)

    autoencoder_input = encoder_input
    autoencoder_output = decoder(encoded_layer)
    autoencoder = keras.models.Model(autoencoder_input, autoencoder_output)

    opt = keras.optimizers.Adam(learning_rate=learning_rate_num)
    autoencoder.compile(loss="binary_crossentropy", optimizer=opt)

    loss_values = []
    epoch_numbers = []

    for epoch in range(epoch_num):
        history = await asyncio.to_thread(
            autoencoder.fit,
            data_scaled,
            data_scaled,
            epochs=1,
            batch_size=batch_size_num,
            verbose=0
        )

        loss_values.append(history.history['loss'][0])
        epoch_numbers.append(epoch + 1)

        if best_hyperparameters is None or history.history['loss'][0] < best_loss:
            best_loss = history.history['loss'][0]
            best_hyperparameters = {
                'num_layers_values': num_layers_values,
                'layer_dim_values': layer_dim_values,
                'activation': activation_ch,
                'epochs': epoch + 1,
                'batch_size': batch_size_num,
                'learning_rate': learning_rate_num
            }

async def main():
    hyperparameter_grid = product(
        [3, 4, 5, 6, 7, 8, 9, 10],
        [10, 15, 20, 25, 30, 45],
        ["relu", "sigmoid", "tanh"],
        range(100, 1500, 100),
        [128],
        np.linspace(0.0001, 0.1, num=20)
    )

    tasks = []

    for hyperparameters in hyperparameter_grid:
        task = asyncio.create_task(sdae_reduction(data_scaled, dim_num=2, hyperparameters=hyperparameters))
        tasks.append(task)

    await asyncio.gather(*tasks)

if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())
    print("Training completed.")


10/60 [====>.........................] - ETA: 4s - loss: -548773440782336.0000

KeyboardInterrupt: 



