In [None]:
import pandas as pd
import numpy as np
import json
import joblib
import tensorflow as tf
import glob
import math
from sklearn.metrics import mean_absolute_error
from threading import Thread

In [None]:
from package.cluster_hundler import *
from package.miscellaneous import *
from package.model_hundler import *
from package.process_data import *

In [None]:
def revert_norm(norm_df, params):
    df = norm_df.copy()

    df.loc[:, params['x_col']] = df[params['x_col']]*params['x_max']
    df.loc[:, params['y_col']] = df[params['y_col']]*params['y_max']

    df.loc[:, 'z'] = df['z']*params['z_max'][int(df.iloc[0]['group'])]

    return df

def plot_df(data_df, params, z_col, title, plot_limits = True, w = 20, h = 20, dpi = 60):

    figure(figsize=(w,h),dpi=dpi)
    ax = plt.axes(projection ='3d')

    ax.scatter(data_df[params['x_col']], data_df[params['y_col']], data_df[z_col], c = data_df[z_col])
    ax.set_ylabel(params['y_col'])
    ax.set_xlabel(params['x_col'])
    ax.set_zlabel(z_col)
    
##---------- #Plot limitations -----------------
    if plot_limits:
        d_p = data_df[data_df[params['x_col']] == params['x_k']]
        z_p = (d_p[d_p[params['y_col']] == params['y_k']]).iloc[0][z_col]
        
        #vertical line at x = x_k, y = y_k
        ax.plot(np.ones(int(z_p)+1)*params['x_k'],
                np.ones(int(z_p)+1)*params['y_k'],
                np.arange(0, int(z_p)+1, 1), 'k--', alpha=1, linewidth=2.5)
        
        #horizontal line at: x = x_k, z = 0
        cy = int(params['y_k'] / params['y_step'] + 1)
        ax.plot(np.ones(cy)*params['x_k'],
                np.arange(0,params['y_k']+params['y_step'],params['y_step']),
                np.ones(cy)*0,'k--',alpha=1, linewidth=2.5)

        #horizontal line at: y = y_k, z = 0
        cx = int(params['x_k'] / params['x_step'] + 1)
        ax.plot(np.arange(0,params['x_k']+params['x_step'],params['x_step']),
                np.ones(cx)*params['y_k'],
                np.ones(cx)*0,
                'k--',alpha=1, linewidth=2.5)

        #horizontal line at: x = x_k, z = z_p
        cy = int(params['y_k'] / params['y_step'] + 1)
        ax.plot(np.ones(cy)*params['x_k'],
                np.arange(0,params['y_k']+params['y_step'],params['y_step']),
                np.ones(cy)*z_p,'k--',alpha=1, linewidth=2.5)

        #horizontal line at: y = y_k, z = z_p
        cx = int(params['x_k'] / params['x_step'] + 1)
        ax.plot(np.arange(0,params['x_k']+params['x_step'],params['x_step']),
                np.ones(cx)*params['y_k'],
                np.ones(cx)*z_p,
                'k--',alpha=1, linewidth=2.5)
        #-------------------------------------------------

    # syntax for plotting
    ax.set_title(title)
    
    plt.show()

In [None]:
path = "./models/fit_wtfilters_SA_NA_c15/"
sub_path = "val_sub_models_15"

params = dict(json.loads(open(path+"/"+sub_path+"/model_params.json", "r").read()))
database = load_db(path+"/database.json")

In [None]:
fit_norm_submodels_df = pd.read_csv("./data/fit_norm_submodels_df.csv")
val_norm_submodels_df = pd.read_csv("./data/val_norm_submodels_df.csv")
fit_val_norm_data = pd.concat([fit_norm_submodels_df, val_norm_submodels_df]).sort_values(by=['filter_', params['x_col'], params['y_col']])
fit_val_data = pd.read_csv("./data/fit_val_data_submodels.csv")

### Error Comparison

In [None]:
fit_val_g = fit_val_data.groupby(by="filter_")
fit_norm_g = fit_norm_submodels_df.groupby(by='filter_')
fit_val_max = fit_val_g.max()

database['data_arr'] = {k: v for k, v in sorted(database['data_arr'].items(), key=lambda item: item[0])}

describe_results = []

threads = []

l = list(glob.glob(path+"/"+sub_path+"/*.h5"))
ll = np.array([ v.replace("//","/").replace('\\', '/').replace(".h5","").split("/") for v in l])[:,-1]

filters = np.intersect1d(list(fit_norm_g.indices.keys()), ll)

for filter_ in filters:
    fit_val_f = fit_val_g.get_group(filter_) #original data non normalized

    sub_model = tf.keras.models.load_model(path+"/"+sub_path+"/"+filter_+".h5")
    group = int(fit_norm_g.get_group(filter_).iloc[0]['group']) #get the group used to fit the model
    df_pred = predict_regression(sub_model, database, params, group) #inference already in real values
    df_pred = df_pred[df_pred[params['x_col']] <= fit_val_max.loc[filter_,params['x_col']]]
    df_pred = df_pred[df_pred[params['y_col']] <= fit_val_max.loc[filter_,params['y_col']]]

    describe_results.append({
        "filter_": filter_,
        "group": group,
        "overall_mae": mean_absolute_error(fit_val_f.z, df_pred.z_pred)
    })

describe_results_df = pd.DataFrame(describe_results)

In [None]:
describe_results_df.describe()

### Plot Results

In [None]:
params

In [None]:
#cutted_compare = cut_flattening_filters(df=fit_val_data, params=params, filter_c='filter_', dz_per=0.25)

fit_val_g = fit_val_data.groupby(by="filter_")
fit_norm_g = fit_norm_submodels_df.groupby(by='filter_')
describe_g = describe_results_df.groupby(by="filter_")

database['data_arr'] = {k: v for k, v in sorted(database['data_arr'].items(), key=lambda item: item[0])}

for filter_ in filters:

    #Plot original data
    fit_val_f = fit_val_g.get_group(filter_)
    plot_df(data_df=fit_val_f, params = params, z_col = 'z', title="original: "+filter_, plot_limits =True, w = 10, h = 10, dpi=60)

    #make prediction
    sub_model = tf.keras.models.load_model(path+"/"+sub_path+"/"+filter_+".h5")
    group = int(fit_norm_g.get_group(filter_).iloc[0]['group']) #get the group used to fit the model

    df_pred = predict_regression(sub_model, database, params, group)
    plot_df(data_df=df_pred, params = params, z_col = 'z_pred', title="prediction: "+filter_+" over_all error: "+str(describe_g.get_group(filter_).iloc[0]['overall_mae']), w = 10, h = 10, dpi=60)
