In [None]:
import pandas as pd
import numpy as np
import os
from utils import dataframe_to_tensor_dataset, BaselineRepeatLast, LinearBaseline, R2_Score, train_model
from tensorflow import keras
import scipy

In [None]:
# Load the normalised training and testing set dataframes
train_df = pd.read_csv(os.path.join("Data", "training_normalised.csv"))
test_df = pd.read_csv(os.path.join("Data", "testing_normalised.csv"))

In [None]:
# Evaluate the Baseline 1 model on the testing set

# Obtain the Tensorflow dataset
test_dataset = dataframe_to_tensor_dataset(test_df, 11, 7, 7, 32)

# Initialise the baseline model
baseline_1 = BaselineRepeatLast()
baseline_1.compile(loss=keras.losses.MeanSquaredError(),
                      metrics=[ keras.metrics.MeanAbsoluteError()])

# Obtain the MSE, MAE results for the Baseline 1 model
evaluation_results = baseline_1.evaluate(test_dataset, verbose=0)

# Obtain the R2 score for the Baseline 1 model
predictions_1 = baseline_1.predict(test_dataset, verbose=0)
prediction_vector_1 = predictions_1.squeeze().flatten()
prediction_batched_1 = np.expand_dims(prediction_vector_1, axis = 1)

targets = np.concatenate([target for _, target in test_dataset], axis=0)
targets = targets.squeeze().flatten()
targets_batched = np.expand_dims(targets, axis = 1)

mse = evaluation_results[0]
mae = evaluation_results[1]
r2 = R2_Score(prediction_batched_1, targets_batched)

print(f"When evaluated on testing set, the Baseline 1 model achieves MSE of {mse:.3f}, MAE of {mae:.3f} and R2 score of {r2:.3f}")



In [None]:
# Evaluate the Baseline 2 model on the testing set

# Initialise the baseline model
baseline_2 = LinearBaseline()
baseline_2.compile(loss=keras.losses.MeanSquaredError(),
                      metrics=[ keras.metrics.MeanAbsoluteError()])

# Obtain the MSE, MAE results for the Baseline 2 model
evaluation_results = baseline_2.evaluate(test_dataset, verbose=0)

# Obtain the R2 score for the Baseline 2 model
predictions_2 = baseline_2.predict(test_dataset, verbose=0)
prediction_vector_2 = predictions_2.squeeze().flatten()
prediction_batched_2 = np.expand_dims(prediction_vector_2, axis = 1)

mse = evaluation_results[0]
mae = evaluation_results[1]
r2 = R2_Score(prediction_batched_2, targets_batched)

print(f"When evaluated on testing set, the Baseline 2 model achieves MSE of {mse:.3f}, MAE of {mae:.3f} and R2 score of {r2:.3f}")



In [None]:
# Evaluation of 4 LSTM models

# Define the hyperparameters for the 4 best models:
model_1 = {
    'hidden_units': 32,
    'batch_size': 32,
    'epochs': 130,
    'audio_features_id': None, 
    'num_audio_features': None,
    'followers_id': None}

model_2 = {
    'hidden_units': 128,
    'batch_size': 32,
    'epochs': 40,
    'audio_features_id': 3, 
    'num_audio_features': 7,
    
    'followers_id': None}

model_3 = {
    'hidden_units': 32,
    'batch_size': 32,
    'epochs': 150,
    'audio_features_id': None, 
    'num_audio_features': None,
    'followers_id': 2}

model_4 = {
    'hidden_units': 256,
    'batch_size': 64,
    'epochs': 50,
    'audio_features_id': 3, 
    'num_audio_features': 7,
    'followers_id': 2}


# Iterate over all 4 models. Calculate their MSE, MAE and R2 scores on the test set
for id, model in enumerate([model_1, model_2, model_3, model_4]):
    
    batch_size = model["batch_size"]
    hidden_units = model["hidden_units"]
    epochs = model["epochs"]
    
    audio_features_id = model["audio_features_id"]
    num_audio_features = model["num_audio_features"]
    followers_id = model["followers_id"]
    
    
    train_dataset = dataframe_to_tensor_dataset(train_df, 11, 7, 7, batch_size, audio_features_id = audio_features_id, num_audio_features=num_audio_features, followers_id= followers_id)
    test_dataset = dataframe_to_tensor_dataset(test_df, 11, 7, 7, batch_size, audio_features_id = audio_features_id, num_audio_features=num_audio_features, followers_id= followers_id)

    # Train the model with the best chosen hyperparameter set
    _,model = train_model(hidden_units, epochs, train_dataset)
    evaluation_results = model.evaluate(test_dataset, verbose=0)
    
    # Obtain the R2 score for the model
    predictions = model.predict(test_dataset, verbose=0)
    prediction_vector = predictions.squeeze().flatten()
    prediction_batched = np.expand_dims(prediction_vector, axis = 1)
    
    mse = evaluation_results[0]
    mae = evaluation_results[1]
    r2 = R2_Score(prediction_batched, targets_batched)

    print(f"When evaluated on testing set, the model {id+1} achieves MSE of {mse:.3f}, MAE of {mae:.3f} and R2 score of {r2:.3f}")
    
    #Finding the loss between the model or baseline 2 predictions to the actual targets
    loss_model = np.absolute(prediction_vector - targets)
    loss_baseline = np.absolute(prediction_vector_2 - targets)
    
    #Running a t-test for each model loss against baseline 2 loss
    t_statistic, p_value = scipy.stats.ttest_rel(loss_model, loss_baseline, alternative = "less")
    
    print(f"Testing model {id+1} loss to baseline 2 loss achieves the statistics below:")
    print("t-Statistic:", t_statistic)
    print("p-Value:", p_value)
    