In [56]:
# Import libraries
import numpy as np
import sqlite3
import pandas as pd
import tensorflow as tf
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from datetime import datetime


In [57]:
#Import the data
def Import():
    num_vertices = 4
    k_max = 100

    database_name = '../data/{}Vertices_k{}.db'.format(num_vertices,k_max)
    conn = sqlite3.connect(database_name)
    return conn, k_max

# the following function is necessary to be sure the coordiantes are imported as integers.
def convert_to_int(lst):
    return [int(x) for x in lst.strip('[]').split(',')]


In [58]:
def PrepTrainData(conn):
    # Prepare the training data
    train_df = pd.read_sql_query('SELECT coordinates, volume FROM Data', conn)
    train_df['coordinates'] = train_df['coordinates'].apply(convert_to_int)
    train_df['volume'] = pd.to_numeric(train_df['volume'], errors='coerce')

    X = np.stack(train_df['coordinates'].values)
    y = train_df['volume'].values

    X_train = X
    Y_train = y
    print(len(X_train))
    return X_train, Y_train

In [59]:
def CreateAndTrainModel(number_of_nodes, epochs, X_train, Y_train):
    # Define the model architecture
    Number_hidden_nodes = number_of_nodes

    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(Number_hidden_nodes, activation='relu', input_shape=(len(X_train[0]),)),
        tf.keras.layers.Dense(1, activation='linear')
    ])

    # Define the optimizer
    optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01)

    # Compile the model with the optimizer
    model.compile(optimizer=optimizer, loss='mae', metrics=['accuracy'])

    # Train the model
    History = model.fit(X_train, Y_train, epochs=epochs, verbose=0)
    return History, model

In [60]:
def CreateMAEs(conn, k_max, model):
  mae_list = []

  for i in range(0,k_max+1):
    test_df = pd.read_sql_query('SELECT coordinatesB_{0}, volume FROM Data'.format(i), conn)
    test_df['new_coordinates'] = test_df['coordinatesB_{0}'.format(i)].apply(convert_to_int)
    test_df['volume'] = pd.to_numeric(test_df['volume'], errors='coerce')

    # We compute the MAE with respect the new database.

    # Preparing the data
    X_test = np.stack(test_df['new_coordinates'].values)
    Y_test = test_df['volume'].values

    # Make predictions on the new input data
    new_y_pred = model.predict(X_test)
    mae = mean_absolute_error(Y_test, new_y_pred)
    mae_list.append(mae)
  
  return mae_list

In [61]:
def Stats():
    # Define the input data as a list
    data = [i for i in range(0,k_max+1)]

    # Convert the input data to a numpy array
    x = np.array(data)

    # Define the output data as a list
    output = mae_list

    # Convert the output data to a numpy array
    y = np.array(output)

    # Perform linear regression on the data
    slope, intercept = np.polyfit(x, y, 1)
    return slope, intercept

In [62]:
def Evaluate():
    
    f = open("EvaluationResults.csv", "a")
    f.write("nodes,epochs,slope,intercept,avg_mae,low_mae,high_mae,time")
    f.close()

    nodes = [1, 10, 25, 50, 100, 250, 500, 750, 1000]
    epochs = [1, 10, 25, 50, 100, 250, 500, 750, 1000]

    for n in nodes:
        for e in epochs:
            start = datetime.now()
            f = open("../results/EvaluationResults.csv", "a")
            conn, k_max = Import()
            X_train, Y_train = PrepTrainData(conn)
            History, model = CreateAndTrainModel(n, e, X_train, Y_train)
            mae_list = CreateMAEs(conn, k_max, model)
            slope, intercept = Stats()
            end = datetime.now()
            time = end - start
            line = n + "," + e + "," + slope + "," + intercept + "," + avg(mae_list) + "," + min(mae_list) + ","+ max(mae_list) + "," + time
            f.write(line)
            f.close()

In [63]:
Evaluate()

42246
   1/1321 [..............................] - ETA: 12s

KeyboardInterrupt: 