In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd '/content/drive/MyDrive/popularity_pred'

/content/drive/MyDrive/popularity_pred


In [None]:
# !rm -rf models_3

In [None]:
import numpy as np
import pandas as pd
import xgboost as XGB
from prettytable import PrettyTable
from sklearn.cluster import KMeans, DBSCAN
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, f1_score, silhouette_score

from preprocess import standard_scaler
from evaluator import spearmanr, reg_eval_metrics
from feature_gen import gen_text_features, gen_date_features

import warnings
warnings.simplefilter("ignore")

import tensorflow.keras as keras
import tensorflow as tf
import numpy as np
import time
import tensorflow.keras.backend as K

import os

In [None]:
def cluster_data(df, num_clusters, start_day='Day01', end_day='Day30'):
    X = df.loc[:, start_day:end_day]
    for col in X.columns:
        X[col] = X[col]/df['Day30']
    X = X.values
    #X = np.log(X+ 0.001)

    kmeans = KMeans(n_clusters = num_clusters, init = 'k-means++', max_iter =300, n_init = 10, random_state = 0)
    kmeans.fit(X)
    labels = kmeans.labels_
    aucc = silhouette_score(X, labels, metric='euclidean')
    print ('wcss: ', kmeans.inertia_, 'silhouette score: ', aucc)

    return kmeans, labels

In [None]:
def drop_columns(X, cols=['URL', 'Title', 'Description',
                            'Tags', 'Country', 'Unnamed: 0_y',
                            'Unnamed: 0_x', 'Camera','UserId',
                            'Username', 'FlickrId','DatePosted',
                            'DateTaken', 'DateCrawl','Latitude', 'Longitude']
                ):
    for col in cols:
        X = X.drop(col, axis=1)
    return X

def median_rmse(rmse_values):
    return np.median(rmse_values)

def trmse_median(rmse_values):
    rmse_values = np.array(rmse_values)
    rmse_values.sort()
    q1 = np.percentile(rmse_values, 25)
    q3 = np.percentile(rmse_values, 75)
    positive_indices = np.where(
        (rmse_values>q1) & (rmse_values<q3),
        True,
        False,
    )
    t_rmse_values = rmse_values[positive_indices]
    return median_rmse(t_rmse_values), t_rmse_values.mean()

In [None]:
users_df = pd.read_csv('data/users_TRAIN.csv')
image_info_df = pd.read_csv('data/img_info_TRAIN.csv')
headers_df = pd.read_csv('data/headers_TRAIN.csv')
popularity_df = pd.read_csv('data/popularity_TRAIN.csv')

print(users_df.shape, image_info_df.shape, headers_df.shape, popularity_df.shape)

cdf = pd.merge(headers_df, image_info_df, on='FlickrId')
combined_df = pd.merge(cdf, users_df, on='UserId' )
combined_df = pd.merge(combined_df, popularity_df, on='FlickrId')
print(f"Final data: {combined_df.shape}")

combined_df = combined_df.drop_duplicates('URL')
print(f"Dropped final data: {combined_df.shape}")

num_clusters = 2
kmeans_period_1, labels_period_1 = cluster_data(combined_df, 3, 'Day01', 'Day10')
kmeans_period_2, labels_period_2 = cluster_data(combined_df, 3, 'Day11', 'Day20')
kmeans_period_3, labels_period_3 = cluster_data(combined_df, 10, 'Day21', 'Day30')

(23046, 11) (20337, 14) (28383, 7) (20337, 32)
Final data: (21950, 61)
Dropped final data: (20337, 61)
wcss:  4107.478268296869 silhouette score:  0.4067942500651291
wcss:  1887.5760776459317 silhouette score:  0.5392313466861997
wcss:  236.6461874782201 silhouette score:  0.5715522856718139


In [None]:
X = combined_df.loc[:, 'FlickrId': 'GroupsAvgPictures']

X = gen_text_features(X, 'Title')
X = gen_date_features(X, 'DatePosted')
X = gen_text_features(X, 'Description')

X['Tags'] = X['Tags'].map(lambda x: ' '.join(x))
X = gen_text_features(X, 'Tags')

X = drop_columns(X)

X['views_by_contact'] = X['MeanViews'] / (X['Contacts'] + 0.001)
X['views_by_num_grps'] = X['MeanViews'] / (X['NumGroups'] + 0.001)
X['views_by_photocount'] = X['MeanViews'] / (X['PhotoCount'] + 0.001)
X['views_by_grpavg'] = X['MeanViews'] / (X['GroupsAvgPictures'] + 0.001)
X['views_by_avg_grp_mem'] = X['MeanViews'] / (X['AvgGroupsMemb'] + 0.001)

col_names = X.columns
print(col_names)

X = standard_scaler(X.values)
Y_scale = combined_df['Day30'].values

# Y_scale = np.log(Y_scale/30.0 + 0.1)
Y_scale = np.log(np.log(Y_scale + 1) + 1)

Index(['Size', 'NumSets', 'NumGroups', 'AvgGroupsMemb', 'AvgGroupPhotos',
       'Ispro', 'HasStats', 'Contacts', 'PhotoCount', 'MeanViews',
       'GroupsCount', 'GroupsAvgMembers', 'GroupsAvgPictures',
       'Title_word_count', 'Title_num_chars', 'Title_avg_word_len',
       'Title_num_uppercase', 'Title_num_title_case', 'DatePosted_year',
       'DatePosted_day', 'DatePosted_hour', 'DatePosted_day_of_week',
       'Description_word_count', 'Description_num_chars',
       'Description_avg_word_len', 'Description_num_uppercase',
       'Description_num_title_case', 'Tags_word_count', 'Tags_num_chars',
       'Tags_avg_word_len', 'Tags_num_uppercase', 'Tags_num_title_case',
       'views_by_contact', 'views_by_num_grps', 'views_by_photocount',
       'views_by_grpavg', 'views_by_avg_grp_mem'],
      dtype='object')


In [None]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

class Regressor_RESNET:
    def __init__(self, input_shape, feature_maps, file_name, verbose=True, build=True):
        if build == True:
            self.verbose = verbose
            self.model = self.build_model(input_shape, feature_maps, file_name)
            if (verbose == True):
                self.model.summary()

            self.model.save_weights('model_init.hdf5')
        return

    def build_model(self, input_shape, n_feature_maps, file_name):
        # n_feature_maps = 64 * 2

        input_layer = keras.layers.Input(input_shape)

        # BLOCK 1
        conv_x = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=8, padding='same')(input_layer)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        conv_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_z = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # expand channels for the sum
        shortcut_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=1, padding='same')(input_layer)
        shortcut_y = keras.layers.BatchNormalization()(shortcut_y)

        output_block_1 = keras.layers.add([shortcut_y, conv_z])
        output_block_1 = keras.layers.Activation('relu')(output_block_1)

        # BLOCK 2
        conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(output_block_1)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=1, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=1, padding='same')(conv_y)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # expand channels for the sum
        shortcut_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=1, padding='same')(output_block_1)
        shortcut_y = keras.layers.BatchNormalization()(shortcut_y)

        output_block_2 = keras.layers.add([shortcut_y, conv_z])
        output_block_2 = keras.layers.Activation('relu')(output_block_2)

        # BLOCK 3
        conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_2)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        # r= 4 worked better
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=1, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_1 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//4, kernel_size=1, padding='same')(conv_y_1)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_1 = keras.layers.Activation('relu')(conv_y)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=3, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_2 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//4, kernel_size=3, padding='same')(conv_y_2)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_2 = keras.layers.Activation('relu')(conv_y)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=5, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_3 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//4, kernel_size=5, padding='same')(conv_y_3)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_3 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.add([conv_y_1, conv_y_2, conv_y_3])

        conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # no need to expand channels because they are equal
        shortcut_y = keras.layers.BatchNormalization()(output_block_2)

        output_block_3 = keras.layers.add([shortcut_y, conv_z])
        output_block_3 = keras.layers.Activation('relu')(output_block_3)

        # FINAL
        gap_layer = keras.layers.GlobalAveragePooling1D()(output_block_3)
        gap_layer = keras.layers.Dropout(0.5)(gap_layer)
        gap_layer = keras.layers.Dense(256, activation='relu')(gap_layer)
        gap_layer = keras.layers.Dropout(0.5)(gap_layer)
        gap_layer = keras.layers.Dense(128, activation='linear')(gap_layer)

        output_layer = keras.layers.Dense(1, activation='linear')(gap_layer)

        model = keras.models.Model(inputs=input_layer, outputs=output_layer)
        model.compile(
            loss=root_mean_squared_error,
            metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")],
            optimizer=keras.optimizers.Adam()
        )

        reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=10, min_lr=0.0001)

        model_checkpoint = keras.callbacks.ModelCheckpoint(
                              file_name,
                              verbose=self.verbose,
                              monitor='val_loss',
                              save_best_only=True,
                              mode='auto'
                          )

        early_stopping = keras.callbacks.EarlyStopping(
                            monitor="val_loss",
                            min_delta=0.000001,
                            patience=20,
                            verbose=0,
                            mode="auto",
                        )

        self.callbacks = [reduce_lr, model_checkpoint, early_stopping]
        return model

    def fit_predict(self, x_train, y_train, x_val, y_val, file_name, nb_epochs):
        if not tf.test.is_gpu_available:
            print('error')
            exit()

        batch_size = 128
        mini_batch_size = int(min(x_train.shape[0] / 10, batch_size))

        hist = self.model.fit(
                    x_train, y_train,
                    epochs=nb_epochs,
                    verbose=True,
                    batch_size=mini_batch_size,
                    validation_data=(x_val, y_val),
                    callbacks=self.callbacks
                )

        # self.model.save(file_name)
        y_pred = None # self.predict(x_val, file_name)
        keras.backend.clear_session()

        return y_pred

    def predict(self, x_test, file_name):
        model_path =  file_name
        model = keras.models.load_model(model_path, compile=False)
        y_pred = model.predict(x_test)

        return np.squeeze(y_pred, axis=1)

In [None]:
class Classifier_RESNET:
    def __init__(self, input_shape, feature_maps, file_name, verbose=True, build=True):
        if build == True:
            self.verbose = verbose
            self.model = self.build_model(input_shape, feature_maps, file_name)
            if (verbose == True):
                self.model.summary()

            self.model.save_weights('model_init.hdf5')
        return

    def build_model(self, input_shape, n_feature_maps, file_name):
        # n_feature_maps = 64 * 2

        input_layer = keras.layers.Input(input_shape)

        # BLOCK 1
        conv_x = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=8, padding='same')(input_layer)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        conv_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_z = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # expand channels for the sum
        shortcut_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=1, padding='same')(input_layer)
        shortcut_y = keras.layers.BatchNormalization()(shortcut_y)

        output_block_1 = keras.layers.add([shortcut_y, conv_z])
        output_block_1 = keras.layers.Activation('relu')(output_block_1)

        # BLOCK 2
        conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(output_block_1)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=1, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=1, padding='same')(conv_y)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y = keras.layers.Activation('relu')(conv_y)

        conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # expand channels for the sum
        shortcut_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=1, padding='same')(output_block_1)
        shortcut_y = keras.layers.BatchNormalization()(shortcut_y)

        output_block_2 = keras.layers.add([shortcut_y, conv_z])
        output_block_2 = keras.layers.Activation('relu')(output_block_2)

        # BLOCK 3
        conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_2)
        conv_x = keras.layers.BatchNormalization()(conv_x)
        conv_x = keras.layers.Activation('relu')(conv_x)

        # r= 4 worked better
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=1, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_1 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//4, kernel_size=1, padding='same')(conv_y_1)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_1 = keras.layers.Activation('relu')(conv_y)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=3, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_2 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//4, kernel_size=3, padding='same')(conv_y_2)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_2 = keras.layers.Activation('relu')(conv_y)

        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//16, kernel_size=5, padding='same')(conv_x)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_3 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.Conv1D(filters=(n_feature_maps * 2)//4, kernel_size=5, padding='same')(conv_y_3)
        conv_y = keras.layers.BatchNormalization()(conv_y)
        conv_y_3 = keras.layers.Activation('relu')(conv_y)
        conv_y = keras.layers.add([conv_y_1, conv_y_2, conv_y_3])

        conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
        conv_z = keras.layers.BatchNormalization()(conv_z)

        # no need to expand channels because they are equal
        shortcut_y = keras.layers.BatchNormalization()(output_block_2)

        output_block_3 = keras.layers.add([shortcut_y, conv_z])
        output_block_3 = keras.layers.Activation('relu')(output_block_3)

        # FINAL
        gap_layer = keras.layers.GlobalAveragePooling1D()(output_block_3)
        gap_layer = keras.layers.Dropout(0.5)(gap_layer)
        gap_layer = keras.layers.Dense(256, activation='relu')(gap_layer)
        gap_layer = keras.layers.Dropout(0.5)(gap_layer)
        gap_layer = keras.layers.Dense(128, activation='linear')(gap_layer)
        output_layer = keras.layers.Dense(1, activation='sigmoid')(gap_layer)

        model = keras.models.Model(inputs=input_layer, outputs=output_layer)
        model.compile(
            loss='binary_crossentropy',
            metrics=['accuracy'],
            optimizer=keras.optimizers.Adam()
        )

        reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=10, min_lr=0.0001)

        model_checkpoint = keras.callbacks.ModelCheckpoint(
                              file_name,
                              verbose=self.verbose,
                              monitor='val_loss',
                              save_best_only=True,
                              mode='auto'
                          )

        early_stopping = keras.callbacks.EarlyStopping(
                            monitor="val_loss",
                            min_delta=0.000001,
                            patience=20,
                            verbose=0,
                            mode="auto",
                        )

        self.callbacks = [reduce_lr, model_checkpoint, early_stopping]
        return model

    def fit_predict(self, x_train, y_train, x_val, y_val, file_name, nb_epochs):
        if not tf.test.is_gpu_available:
            print('error')
            exit()

        batch_size = 128
        mini_batch_size = int(min(x_train.shape[0] / 10, batch_size))

        hist = self.model.fit(
                    x_train, y_train,
                    epochs=nb_epochs,
                    verbose=True,
                    batch_size=mini_batch_size,
                    validation_data=(x_val, y_val),
                    callbacks=self.callbacks,
                )

        # self.model.save(file_name)
        y_pred = None # self.predict(x_val, file_name)
        keras.backend.clear_session()

        return y_pred

    def predict(self, x_test, file_name):
        model_path =  file_name
        model = keras.models.load_model(model_path)
        y_pred = model.predict(x_test)

        y_pred[y_pred > 0.5] = 1
        y_pred[y_pred <= 0.5] = 0

        return np.squeeze(y_pred, axis=1)

In [None]:
train_epochs = 500
feature_maps = 64

feature_maps

64

In [None]:
path_to_model = '/content/drive/MyDrive/popularity_pred/models_1'

if not os.path.exists(path_to_model):
    os.mkdir(path_to_model)

In [None]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [None]:
def classifier(X_train, y_train,
                X_test, y_test, col_names=None, period=1, run=1):

    # X_train = np.expand_dims(X_train, axis=1)
    # X_test = np.expand_dims(X_test, axis=1)

    # model_path = f'{path_to_model}/classifier_p{period}_{run}.hdf5'
    # clf = Classifier_RESNET(X_train[0].shape, feature_maps, model_path, verbose=False)

    # if col_names is not None:
    #     X_train = pd.DataFrame(data=X_train, columns=col_names)
    #     X_test = pd.DataFrame(data=X_test, columns=col_names)

    # if run == 3:
    # if not os.path.isfile(model_path):
    #     clf.fit_predict(
    #         X_train, y_train, X_test, y_test,
    #         model_path, train_epochs
    #     )
    clf = RandomForestClassifier()
    clf.fit(X_train, y_train)
    y_train_pred = clf.predict(X_train)

    # print(y_train_pred.shape, y_train.shape)
    fold_score = f1_score(y_train, y_train_pred, average='weighted')
    print ("train data f1 score: ", fold_score)

    y_pred = clf.predict(X_test)
    fold_score = f1_score(y_test, y_pred, average='weighted')
    print ("classifier f1 score: ", fold_score)

    return y_pred, fold_score

In [None]:
def regressor(X_train_folds_reg, y_train_folds_reg,
              X_test_fold_reg, y_test_fold_reg, col_names=None, period=1, run=1):

    # X_train_folds_reg = np.expand_dims(X_train_folds_reg, axis=1)
    # X_test_fold_reg = np.expand_dims(X_test_fold_reg, axis=1)

    # model_path = f'{path_to_model}/regressor_p{period}_{run}.hdf5'
    # clf = Regressor_RESNET(X_train_folds_reg[0].shape, feature_maps, model_path, verbose=False)

    # if col_names is not None:
    #     X_train_folds_reg = pd.DataFrame(data=X_train_folds_reg, columns=col_names)
    #     X_test_fold_reg = pd.DataFrame(data=X_test_fold_reg, columns=col_names)

    # if not os.path.isfile(model_path):
    #     clf.fit_predict(
    #         X_train_folds_reg, y_train_folds_reg,
    #         X_test_fold_reg, y_test_fold_reg,
    #         model_path, train_epochs
    #     )

    # clf = None
    clf = RandomForestRegressor()
    clf.fit(X_train_folds_reg, y_train_folds_reg)

    # ypred = clf.predict(X_train_folds_reg, model_path)
    ypred = clf.predict(X_train_folds_reg)
    # print(y_train_folds_reg.shape, ypred.shape)
    (rmse, mae, r2) = reg_eval_metrics(y_train_folds_reg, ypred)
    p = PrettyTable(['RMSE', 'MAE', 'R2', 'Spearmanr'])
    p.add_row([rmse, mae, r2, spearmanr(y_train_folds_reg, ypred)])
    print(p)

    # ypred = clf.predict(X_test_fold_reg, model_path)
    ypred = clf.predict(X_test_fold_reg)
    (rmse, mae, r2) = reg_eval_metrics(y_test_fold_reg, ypred)
    p = PrettyTable(['RMSE', 'MAE', 'R2', 'Spearmanr'])
    p.add_row([rmse, mae, r2, spearmanr(y_test_fold_reg, ypred)])
    print(p)

    return ypred

In [None]:
def overall_eval_metric(cluster_org_labels, cluster_centers, gt_scale, final_gt,
                        double_log_trans=True):

    missed = 0
    total_rmse, total_mae, total_r2, corr = 0, 0, 0, 0
    truncated_rmse, truncated_mae = [], []

    for i in range(len(gt_scale)):
        #pred = cluster_centers[cluster_org_labels[i]]* ((np.exp(gt_scale[i]) -1)*30)
        # applied log transform now redo the operation
        if double_log_trans:
            pred = cluster_centers[int(cluster_org_labels[i])] * (np.exp(np.exp(gt_scale[i])-1)-1)
        else:
            pred = cluster_centers[cluster_org_labels[i]] * (np.exp(gt_scale[i])-1) * 30
        gt = final_gt[i]
        s_corr = spearmanr(gt, pred)
        rmse, mae, r2 = reg_eval_metrics(gt, pred)
        truncated_rmse.append(rmse)
        truncated_mae.append(mae)

        if not np.isnan(s_corr):
            corr += s_corr
            total_rmse += rmse
            total_mae += mae
            total_r2 += r2
        else:
            missed+=1

    median_trmse, mean_trmse = trmse_median(truncated_rmse)
    median_tmae, mean_tmae = trmse_median(truncated_mae)

    return median_trmse, mean_trmse, median_tmae, mean_tmae

In [None]:
def get_results(X, Y, Y_scale, kmeans, train_index, test_index, start_day, end_day, col_names=None, scale_pos_weight=1, period=1, run=1):
    # X = np.expand_dims(X, axis=1)
    print(X.shape)

    X_train_fold = X[train_index]
    y_train_fold = Y[train_index]
    X_test_fold = X[test_index]
    y_test_fold = Y[test_index]

    y_pred, fold_score = classifier(X_train_fold, y_train_fold,
                                    X_test_fold, y_test_fold, col_names, period, run)
    print (confusion_matrix(y_test_fold, y_pred))

    # Scale Reg
    y_train_reg = Y_scale[train_index]
    y_test_reg  = Y_scale[test_index]
    scale_pred  = regressor(X_train_fold, y_train_reg,
                            X_test_fold, y_test_reg, col_names, period, run)

    corr = 0
    gt_ = combined_df.loc[:, start_day:end_day].values[test_index]

    casea_median, casea_mean, casea_mae_median, casea_mae_mean = overall_eval_metric(cluster_org_labels=Y[test_index],
            cluster_centers = kmeans.cluster_centers_ ,
            gt_scale=y_test_reg,
            final_gt=gt_)

    caseb_median, caseb_mean, caseb_mae_median, caseb_mae_mean = overall_eval_metric(cluster_org_labels=y_pred,
            cluster_centers = kmeans.cluster_centers_,
            gt_scale=y_test_reg,
            final_gt=gt_)

    casec_median, casec_mean, casec_mae_median, casec_mae_mean = overall_eval_metric(cluster_org_labels=Y[test_index],
            cluster_centers = kmeans.cluster_centers_ ,
            gt_scale=scale_pred,
            final_gt=gt_)

    cased_median, cased_mean, cased_mae_median, cased_mae_mean = overall_eval_metric(cluster_org_labels=y_pred,
            cluster_centers = kmeans.cluster_centers_,
            gt_scale=scale_pred,
            final_gt=gt_)
    return casea_median, casea_mean, caseb_median, caseb_mean, casec_median, casec_mean, cased_median, cased_mean, casea_mae_median, casea_mae_mean, caseb_mae_median, caseb_mae_mean, casec_mae_median, casec_mae_mean, cased_mae_median, cased_mae_mean

In [None]:
p1a_avg_mean, p1a_avg_median = [], []
p2a_avg_mean, p2a_avg_median = [], []
p3a_avg_mean, p3a_avg_median = [], []

p1b_avg_mean, p1b_avg_median = [], []
p2b_avg_mean, p2b_avg_median = [], []
p3b_avg_mean, p3b_avg_median = [], []

p1c_avg_mean, p1c_avg_median = [], []
p2c_avg_mean, p2c_avg_median = [], []
p3c_avg_mean, p3c_avg_median = [], []

p1d_avg_mean, p1d_avg_median = [], []
p2d_avg_mean, p2d_avg_median = [], []
p3d_avg_mean, p3d_avg_median = [], []

##############################################################

p1a_mae_avg_mean, p1a_mae_avg_median = [], []
p2a_mae_avg_mean, p2a_mae_avg_median = [], []
p3a_mae_avg_mean, p3a_mae_avg_median = [], []

p1b_mae_avg_mean, p1b_mae_avg_median = [], []
p2b_mae_avg_mean, p2b_mae_avg_median = [], []
p3b_mae_avg_mean, p3b_mae_avg_median = [], []

p1c_mae_avg_mean, p1c_mae_avg_median = [], []
p2c_mae_avg_mean, p2c_mae_avg_median = [], []
p3c_mae_avg_mean, p3c_mae_avg_median = [], []

p1d_mae_avg_mean, p1d_mae_avg_median = [], []
p2d_mae_avg_mean, p2d_mae_avg_median = [], []
p3d_mae_avg_mean, p3d_mae_avg_median = [], []

##############################################################

tablea = PrettyTable(["p1_mean", "p2_mean", "p3_mean", "p1_median", "p2_median", "p3_median"])
tableb = PrettyTable(["p1_mean", "p2_mean", "p3_mean", "p1_median", "p2_median", "p3_median"])
tablec = PrettyTable(["p1_mean", "p2_mean", "p3_mean", "p1_median", "p2_median", "p3_median"])
tabled = PrettyTable(["p1_mean", "p2_mean", "p3_mean", "p1_median", "p2_median", "p3_median"])

##############################################################

tablea_mae = PrettyTable(["p1_mae_mean", "p2_mae_mean", "p3_mae_mean", "p1_mae_median", "p2_mae_median", "p3_mae_median"])
tableb_mae = PrettyTable(["p1_mae_mean", "p2_mae_mean", "p3_mae_mean", "p1_mae_median", "p2_mae_median", "p3_mae_median"])
tablec_mae = PrettyTable(["p1_mae_mean", "p2_mae_mean", "p3_mae_mean", "p1_mae_median", "p2_mae_median", "p3_mae_median"])
tabled_mae = PrettyTable(["p1_mae_mean", "p2_mae_mean", "p3_mae_mean", "p1_mae_median", "p2_mae_median", "p3_mae_median"])

##############################################################

n_splits = 3
skfolds = StratifiedKFold(n_splits=n_splits, random_state=42, shuffle=True)

period_1_test_indices = []
period_1_train_indices = []
for train_index, test_index in skfolds.split(X, labels_period_1):
    period_1_train_indices.append(train_index)
    period_1_test_indices.append(test_index)

period_2_test_indices = []
period_2_train_indices = []
for train_index, test_index in skfolds.split(X, labels_period_2):
    period_2_train_indices.append(train_index)
    period_2_test_indices.append(test_index)

period_3_test_indices = []
period_3_train_indices = []
for train_index, test_index in skfolds.split(X, labels_period_3):
    period_3_train_indices.append(train_index)
    period_3_test_indices.append(test_index)

In [None]:
n = 10

for i in range(n_splits):
    print ("&&&&"*n, f"Run {i+1} Period1 ", "&&&&"*n)
    p1a_median, p1a_mean, p1b_median, p1b_mean, p1c_median, p1c_mean, p1d_median, p1d_mean, p1a_mae_median, p1a_mae_mean, p1b_mae_median, p1b_mae_mean, p1c_mae_median, p1c_mae_mean, p1d_mae_median, p1d_mae_mean \
      = get_results(X, labels_period_1, Y_scale, kmeans_period_1, period_1_train_indices[i], period_1_test_indices[i], 'Day01', 'Day10', col_names, 1, period=1, run=i+1)
    print ("&&&&"*n, f"Run {i+1} Period2 ", "&&&&"*n)
    p2a_median, p2a_mean, p2b_median, p2b_mean, p2c_median, p2c_mean, p2d_median, p2d_mean, p2a_mae_median, p2a_mae_mean, p2b_mae_median, p2b_mae_mean, p2c_mae_median, p2c_mae_mean, p2d_mae_median, p2d_mae_mean \
      = get_results(X, labels_period_2, Y_scale, kmeans_period_2, period_2_train_indices[i], period_2_test_indices[i], 'Day11', 'Day20', col_names, 1, period=2, run=i+1)
    print ("&&&&"*n, f"Run {i+1} Period3 ", "&&&&"*n)
    p3a_median, p3a_mean, p3b_median, p3b_mean, p3c_median, p3c_mean, p3d_median, p3d_mean, p3a_mae_median, p3a_mae_mean, p3b_mae_median, p3b_mae_mean, p3c_mae_median, p3c_mae_mean, p3d_mae_median, p3d_mae_mean \
      = get_results(X, labels_period_3, Y_scale, kmeans_period_3, period_3_train_indices[i], period_3_test_indices[i], 'Day21', 'Day30', col_names, 1, period=3, run=i+1)


    tablea.add_row([p1a_mean, p2a_mean, p3a_mean, p1a_median, p2a_median, p3a_median])
    tableb.add_row([p1b_mean, p2b_mean, p3b_mean, p1b_median, p2b_median, p3b_median])
    tablec.add_row([p1c_mean, p2c_mean, p3c_mean, p1c_median, p2c_median, p3c_median])
    tabled.add_row([p1d_mean, p2d_mean, p3d_mean, p1d_median, p2d_median, p3d_median])


    p1a_avg_mean.append(p1a_mean)
    p1a_avg_median.append(p1a_median)
    p2a_avg_mean.append(p2a_mean)
    p2a_avg_median.append(p2a_median)
    p3a_avg_mean.append(p3a_mean)
    p3a_avg_median.append(p3a_median)

    p1b_avg_mean.append(p1b_mean)
    p1b_avg_median.append(p1b_median)
    p2b_avg_mean.append(p2b_mean)
    p2b_avg_median.append(p2b_median)
    p3b_avg_mean.append(p3b_mean)
    p3b_avg_median.append(p3b_median)

    p1c_avg_mean.append(p1c_mean)
    p1c_avg_median.append(p1c_median)
    p2c_avg_mean.append(p2c_mean)
    p2c_avg_median.append(p2c_median)
    p3c_avg_mean.append(p3c_mean)
    p3c_avg_median.append(p3c_median)

    p1d_avg_mean.append(p1d_mean)
    p1d_avg_median.append(p1d_median)
    p2d_avg_mean.append(p2d_mean)
    p2d_avg_median.append(p2d_median)
    p3d_avg_mean.append(p3d_mean)
    p3d_avg_median.append(p3d_median)

    ############################################################################

    tablea_mae.add_row([p1a_mae_mean, p2a_mae_mean, p3a_mae_mean, p1a_mae_median, p2a_mae_median, p3a_mae_median])
    tableb_mae.add_row([p1b_mae_mean, p2b_mae_mean, p3b_mae_mean, p1b_mae_median, p2b_mae_median, p3b_mae_median])
    tablec_mae.add_row([p1c_mae_mean, p2c_mae_mean, p3c_mae_mean, p1c_mae_median, p2c_mae_median, p3c_mae_median])
    tabled_mae.add_row([p1d_mae_mean, p2d_mae_mean, p3d_mae_mean, p1d_mae_median, p2d_mae_median, p3d_mae_median])


    p1a_mae_avg_mean.append(p1a_mae_mean)
    p1a_mae_avg_median.append(p1a_mae_median)
    p2a_mae_avg_mean.append(p2a_mae_mean)
    p2a_mae_avg_median.append(p2a_mae_median)
    p3a_mae_avg_mean.append(p3a_mae_mean)
    p3a_mae_avg_median.append(p3a_mae_median)

    p1b_mae_avg_mean.append(p1b_mae_mean)
    p1b_mae_avg_median.append(p1b_mae_median)
    p2b_mae_avg_mean.append(p2b_mae_mean)
    p2b_mae_avg_median.append(p2b_mae_median)
    p3b_mae_avg_mean.append(p3b_mae_mean)
    p3b_mae_avg_median.append(p3b_mae_median)

    p1c_mae_avg_mean.append(p1c_mae_mean)
    p1c_mae_avg_median.append(p1c_mae_median)
    p2c_mae_avg_mean.append(p2c_mae_mean)
    p2c_mae_avg_median.append(p2c_mae_median)
    p3c_mae_avg_mean.append(p3c_mae_mean)
    p3c_mae_avg_median.append(p3c_mae_median)

    p1d_mae_avg_mean.append(p1d_mae_mean)
    p1d_mae_avg_median.append(p1d_mae_median)
    p2d_mae_avg_mean.append(p2d_mae_mean)
    p2d_mae_avg_median.append(p2d_mae_median)
    p3d_mae_avg_mean.append(p3d_mae_mean)
    p3d_mae_avg_median.append(p3d_mae_median)

&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& Run 1 Period1  &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
(20337, 37)


KeyboardInterrupt: ignored

In [None]:
print(tablea)
print(tableb)
print(tablec)
print(tabled)

# print(tablea_mae)
# print(tableb_mae)
# print(tablec_mae)
# print(tabled_mae)

+--------------------+--------------------+---------------------+--------------------+--------------------+--------------------+
|      p1_mean       |      p2_mean       |       p3_mean       |     p1_median      |     p2_median      |     p3_median      |
+--------------------+--------------------+---------------------+--------------------+--------------------+--------------------+
| 2.062178944722352  | 1.2527595519956747 |  0.3807142487857374 | 1.5356280371879159 | 0.9447758342698133 | 0.3224860062204702 |
| 2.156862959483312  | 1.2021750869841272 | 0.36206910751740545 | 1.6137378508370015 | 0.893397389592911  | 0.3008840576225813 |
| 2.1641869709129735 | 1.244868173890592  | 0.37200296974484476 | 1.6488597278905823 | 0.9434717707979464 | 0.2994800421866507 |
+--------------------+--------------------+---------------------+--------------------+--------------------+--------------------+
+-------------------+--------------------+--------------------+--------------------+-------------

In [None]:
def get_std(a, b, c):
    # mean = np.mean([np.mean(a), np.mean(b), np.mean(c)])
    # std = np.std([np.mean(a), np.mean(b), np.mean(c)])

    # a1, b1, c1 = [], [], []
    each_mean = []
    for i, j, k in zip(a, b, c):
        each_mean.append((i+j+k) / 3)

    return np.mean(each_mean), np.std(each_mean)

In [None]:
print("CASE A")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1a_avg_mean
p2_avg_mean = p2a_avg_mean
p3_avg_mean = p3a_avg_mean

p1_avg_median = p1a_avg_median
p2_avg_median = p2a_avg_median
p3_avg_median = p3a_avg_median

f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)
f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

print("CASE B")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1b_avg_mean
p2_avg_mean = p2b_avg_mean
p3_avg_mean = p3b_avg_mean

p1_avg_median = p1b_avg_median
p2_avg_median = p2b_avg_median
p3_avg_median = p3b_avg_median

f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)
f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

print("CASE C")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1c_avg_mean
p2_avg_mean = p2c_avg_mean
p3_avg_mean = p3c_avg_mean

p1_avg_median = p1c_avg_median
p2_avg_median = p2c_avg_median
p3_avg_median = p3c_avg_median

f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)
f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

print("CASE D")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1d_avg_mean
p2_avg_mean = p2d_avg_mean
p3_avg_mean = p3d_avg_mean

p1_avg_median = p1d_avg_median
p2_avg_median = p2d_avg_median
p3_avg_median = p3d_avg_median

f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)
f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

CASE A
+---------+-------------+-------------+---------------------+--------------------+--------------------+--------------------+---------------+---------------+
| cluster | p1_avg_mean | p2_avg_mean |     p3_avg_mean     |     p1_median      |     p2_median      |     p3_median      |   Final_mean  |  Final_median |
+---------+-------------+-------------+---------------------+--------------------+--------------------+--------------------+---------------+---------------+
|    2    |    2.128    |    1.233    | 0.37159544201599587 | 1.5994085386384997 | 0.9272149982202236 | 0.3076167020099007 | 1.244 ± 0.012 | 0.945 ± 0.014 |
+---------+-------------+-------------+---------------------+--------------------+--------------------+--------------------+---------------+---------------+
CASE B
+---------+-------------+-------------+--------------------+-------------------+-------------------+--------------------+---------------+---------------+
| cluster | p1_avg_mean | p2_avg_mean |    p3_a

In [None]:
### MAE

print("CASE A")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1a_mae_avg_mean
p2_avg_mean = p2a_mae_avg_mean
p3_avg_mean = p3a_mae_avg_mean

p1_avg_median = p1a_mae_avg_median
p2_avg_median = p2a_mae_avg_median
p3_avg_median = p3a_mae_avg_median

f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)
f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

print("CASE B")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1b_mae_avg_mean
p2_avg_mean = p2b_mae_avg_mean
p3_avg_mean = p3b_mae_avg_mean

p1_avg_median = p1b_mae_avg_median
p2_avg_median = p2b_mae_avg_median
p3_avg_median = p3b_mae_avg_median

f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)
f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

print("CASE C")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1c_mae_avg_mean
p2_avg_mean = p2c_mae_avg_mean
p3_avg_mean = p3c_mae_avg_mean

p1_avg_median = p1c_mae_avg_median
p2_avg_median = p2c_mae_avg_median
p3_avg_median = p3c_mae_avg_median

f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)
f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

print("CASE D")
table = PrettyTable(["cluster", "p1_avg_mean", "p2_avg_mean", "p3_avg_mean", "p1_median", "p2_median", "p3_median", "Final_mean", "Final_median"])

p1_avg_mean = p1d_mae_avg_mean
p2_avg_mean = p2d_mae_avg_mean
p3_avg_mean = p3d_mae_avg_mean

p1_avg_median = p1d_mae_avg_median
p2_avg_median = p2d_mae_avg_median
p3_avg_median = p3d_mae_avg_median

f_mean, f_std = get_std(p1_avg_mean, p2_avg_mean, p3_avg_mean)
f_median, f_median_std = get_std(p1_avg_median, p2_avg_median, p3_avg_median)

table.add_row([num_clusters, round(np.mean(p1_avg_mean), 3), round(np.mean(p2_avg_mean), 3), np.mean(p3_avg_mean),
                np.mean(p1_avg_median), np.mean(p2_avg_median), np.mean(p3_avg_median),
                f"{round(f_mean, 3)} ± {round(f_std, 3)}", f"{round(f_median, 3)} ± {round(f_median_std, 3)}"])
print(table)

CASE A
+---------+-------------+-------------+--------------------+--------------------+--------------------+---------------------+--------------+---------------+
| cluster | p1_avg_mean | p2_avg_mean |    p3_avg_mean     |     p1_median      |     p2_median      |      p3_median      |  Final_mean  |  Final_median |
+---------+-------------+-------------+--------------------+--------------------+--------------------+---------------------+--------------+---------------+
|    2    |    1.822    |    1.088    | 0.2846814033877334 | 1.3463105835241906 | 0.8047868017841268 | 0.23727714511582476 | 1.065 ± 0.01 | 0.796 ± 0.013 |
+---------+-------------+-------------+--------------------+--------------------+--------------------+---------------------+--------------+---------------+
CASE B
+---------+-------------+-------------+--------------------+--------------------+-------------------+---------------------+---------------+---------------+
| cluster | p1_avg_mean | p2_avg_mean |    p3_avg_

[]