In [None]:
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os

column_names = [
    "Index",
    "Sample Size",
    "Learning Rate",
    "λ",
    "Train Erms",
    "Validate Erms",
    "Test Erms",
]
table_index = 0
table_df = pd.DataFrame(columns=column_names)

def build_and_train_model(train_x, train_y, val_x, val_y, learning_rate, lamda):
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Define the model architecture
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(10, activation='tanh', kernel_regularizer=tf.keras.regularizers.L1(lamda), input_shape=(1,)),
            tf.keras.layers.Dense(1, activation='linear')
        ])
    
    # Compile the model with the specified learning rate
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Train the model
    result = model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=100, verbose=0)

    return model, result


def evaluate_model(model, x, y):

    loss = model.evaluate(x, y, verbose=0)

    y_pred = model.predict(x, verbose=0)

    return loss, y_pred

def add_data_to_table(sample_size, learning_rate, lamda, train_erms, validate_erms, test_erms):
    row_data = {}
    global table_df
    global table_index
    table_index = table_index + 1
    row_data["Index"] = table_index
    row_data["Sample Size"] = sample_size
    row_data["Learning Rate"] = learning_rate
    row_data["λ"] = lamda
    row_data["Train Erms"] = train_erms
    row_data["Validate Erms"] = validate_erms
    row_data["Test Erms"] = test_erms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        table_df = pd.concat(
            [table_df, pd.DataFrame(row_data, index=[0])], ignore_index=True
        )


def print_table():
    print(table_df.to_string(index=False))

def get_N(x):
    N = len(x)
    return N

def get_erms(y, t):
    y_mse = tf.reduce_mean(tf.square(t-y))
    erms = np.sqrt(y_mse)
    return erms

def plot_loss(result, sample_size, learning_rate, lamda):

    # Plot training loss
    plt.plot(result.history['loss'], label='Training Loss')
    plt.plot(result.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plot_title = "MLFFNN Training and Validation Loss\nSampleSize = " + str(sample_size) + "\nLearningRate = "+str(learning_rate)+"\nLamda = "+str(lamda)
    plt.title(plot_title, fontsize=16, weight='bold')
    plt.legend()
    # plt.savefig("/home/dipendu/programs/mtech_2023/ml/ass2/trial/reg_1/"+plot_title+'.png')
    plt.show()

def plot_data(x, y, pred_y, x_color, y_color, scatter_label, plot_label, x_label, y_label, plot_title):
    
    plt.scatter(x, y, color=x_color, label=scatter_label, marker="*", s=50)
    plt.plot(x, pred_y, color=y_color, label=plot_label, linestyle="--")
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(plot_title, fontsize=16, weight='bold')
    plt.legend()
    # plt.savefig("/home/dipendu/programs/mtech_2023/ml/ass2/trial/reg_1/"+plot_title+'.png')
    plt.show()

def scatter_plot(x, y, x_label, y_label, plot_title, plot_color):
    plt.scatter(x, y, color=plot_color, label=plot_title, marker="*", s=50)
    plt.title(plot_title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.legend()
    # plt.show()


def line_plot(polyline, y, plot_label, plot_color, lines):
    plt.plot(polyline, y, color=plot_color, label=plot_label, linestyle=lines)
    plt.legend()
    # plt.show()

def plot_all_data(train_x, train_y, train_y_pred, val_x, val_y, val_y_pred, test_x, test_y, test_y_pred, sample_size, learning_rate, lamda):
    
    # Plotting
    # fig = plt.figure(figsize=(15, 5))
    title = "MLFFNN Approximated Function\nSampleSize = " + str(sample_size) + "\nLearningRate = " + str(learning_rate) + "\nLamda = "+str(lamda)+"\nOn "
    # Plot the approximated functions obtained using training data
    # ax1 = fig.add_subplot(131)
    # plot_data(train_x, train_y, train_y_pred, 'r', 'b', 'Training Data', 'Approximated Function', 'input', 'output', title+'Training Data', ax1)
    plot_data(train_x, train_y, train_y_pred, 'r', 'b', 'Training Data', 'Approximated Function', 'input', 'output', title+'Training Data')
    # Plot the approximated functions obtained using validation data
    # ax2 = fig.add_subplot(132)
    # plot_data(val_x, val_y, val_y_pred, 'r', 'g', 'Validation Data', 'Approximated Function', 'input', 'output', title+'Validation Data', ax2)
    plot_data(val_x, val_y, val_y_pred, 'r', 'g', 'Validation Data', 'Approximated Function', 'input', 'output', title+'Validation Data')
    # Plot the approximated functions obtained using test data
    # ax3 = fig.add_subplot(133)
    # plot_data(test_x, test_y, test_y_pred, 'r', 'purple', 'Test Data', 'Approximated Function', 'input', 'output', title + 'Test Data', ax3)
    plot_data(test_x, test_y, test_y_pred, 'r', 'purple', 'Test Data', 'Approximated Function', 'input', 'output', title + 'Test Data')
    # plt.tight_layout()
    # plt.show()
    
def plot_together(train_x, train_y, train_y_pred, val_x, val_y, val_y_pred, test_x, test_y, test_y_pred, sample_size, learning_rate, lamda):

    title = "MLFFNN Approximated Function\nSampleSize = " + str(sample_size) + "\nLearningRate = " + str(learning_rate)+ "\nLamda = "+str(lamda)
    plot_title = "Training"
    scatter_plot(train_x, train_y, "input", "output", plot_title, "blue")
    plot_title = "Test approx fn"
    line_plot(test_x, test_y_pred, plot_title, "green", "-.")
    plot_title = "Validation approx fn"
    line_plot(val_x, val_y_pred, plot_title, "red", ":")
    plot_title = "Training approx fn"
    line_plot(train_x, train_y_pred, plot_title, "brown", "--")
    plot_title = title
    # plt.savefig("/home/dipendu/programs/mtech_2023/ml/ass2/trial/reg_1/"+plot_title+'.png')
    plt.title(plot_title, fontsize=16, weight='bold')
    plt.show()


def plot_graphs(train_x, train_y, val_x, val_y, test_x, test_y, sample_size, regularization_coefficients, learning_rate=0.1):

    for lamda in regularization_coefficients:
        model, result = build_and_train_model(train_x, train_y, val_x, val_y, learning_rate, lamda)
        
        train_loss, train_y_pred = evaluate_model(model, train_x, train_y)
        val_loss, val_y_pred = evaluate_model(model, val_x, val_y)
        test_loss, test_y_pred = evaluate_model(model, test_x, test_y)
        
        train_erms = get_erms(train_y_pred, train_y)
        validate_erms = get_erms(val_y_pred, val_y)
        test_erms = get_erms(test_y_pred, test_y)
        
        # Plot all the graphs
        plot_all_data(train_x, train_y, train_y_pred, val_x, val_y, val_y_pred, test_x, test_y, test_y_pred, sample_size, learning_rate, lamda)
        
        plot_together(train_x, train_y, train_y_pred, val_x, val_y, val_y_pred, test_x, test_y, test_y_pred, sample_size, learning_rate, lamda)

        # Plot training loss
        plot_loss(result, sample_size, learning_rate, lamda)

        add_data_to_table(sample_size, learning_rate, lamda, train_erms, validate_erms, test_erms)

        # Print train loss
        print("Train Loss:", train_loss)
        # Print validation loss
        print("Validation Loss:", val_loss)
        # Print test loss
        print("Test Loss:", test_loss)

def main():
    folder_number = "9"
    current_directory = os.getcwd()
    # regression_dataset_1_path=current_directory+ "/Datasets_for_A1/Regression/Dataset 1/"+folder_number+"/"
    regression_dataset_1_path = (
        "/home/dipendu/programs/mtech_2023/ml/ass2/Datasets_for_A1/Regression/Dataset 1/"
        + folder_number
        + "/"
    )

    regression_dataset_1_Train_Sample_1 = (
        regression_dataset_1_path + "Train-" + folder_number + "-Sample-1.csv"
    )
    df = pd.read_csv(regression_dataset_1_Train_Sample_1)
    data = df.to_numpy()
    sorted_data = data[data[:, 1].argsort()]
    train_x_1 = sorted_data[:, 1]
    train_y_1 = sorted_data[:, 2]

    regression_dataset_1_Train_Sample_2 = (
        regression_dataset_1_path + "Train-" + folder_number + "-Sample-2.csv"
    )
    df = pd.read_csv(regression_dataset_1_Train_Sample_2)
    data = df.to_numpy()
    sorted_data = data[data[:, 1].argsort()]
    train_x_2 = sorted_data[:, 1]
    train_y_2 = sorted_data[:, 2]


    regression_dataset_1_validation = (
        regression_dataset_1_path + "Val-" + folder_number + ".csv"
    )
    df = pd.read_csv(regression_dataset_1_validation)
    data = df.to_numpy()
    sorted_data = data[data[:, 1].argsort()]
    val_x = sorted_data[:, 1]
    val_y = sorted_data[:, 2]

    regression_dataset_1_test = (
        regression_dataset_1_path + "Test-" + folder_number + ".csv"
    )
    df = pd.read_csv(regression_dataset_1_test)
    data = df.to_numpy()
    sorted_data = data[data[:, 1].argsort()]
    test_x = sorted_data[:, 1]
    test_y = sorted_data[:, 2]

    learning_rates = [0.01]
    regularization_coefficients = [0.0, 0.0001, 1e-6, 1e-9]

    sample_size = get_N(train_x_1)
    for learning_rate in learning_rates:
        plot_graphs(
            train_x_1,
            train_y_1,
            val_x,
            val_y,
            test_x,
            test_y,
            sample_size,
            regularization_coefficients,
            learning_rate,
        )

    sample_size = get_N(train_x_2)
    for learning_rate in learning_rates:
        plot_graphs(
            train_x_2,
            train_y_2,
            val_x,
            val_y,
            test_x,
            test_y,
            sample_size,
            regularization_coefficients,
            learning_rate,
        )

    print_table()


if __name__ == "__main__":
    main()