In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from glob import glob
from scipy.special import erf
from scipy.optimize import curve_fit
import json
from sklearn.metrics import mean_absolute_error
from package.miscellaneous import export_params, load_db, plot_df, nbits
from package.process_data import norm_encode_bycluster, cut_flattening_filters, slice_df
import warnings
warnings.filterwarnings("ignore")

In [None]:
from package.acc_models import *

# Fit Curves

In [None]:
subpath = "fit_wtfilters_SA_NA_c"
database = load_db("./models/"+subpath+"/database.json")
params = json.loads(open("./models/"+subpath+"/base_model_params.json", "r").read())
params['x_c'] = 25_000
params['y_c'] = 25

export_params("./models/"+subpath+"/submodels_params.json", params)

In [None]:
fit_val_df = pd.read_csv("./data/fit_val_data_submodels.csv")

In [None]:
top_cutted_df = cut_flattening_filters(df=fit_val_df, params=params, filter_c='filter_', dz_per=0.25)

fit_val_norm_df = norm_encode_bycluster(df = top_cutted_df,
                                        database = database,
                                        params = params,
                                        filter_c_name = "filter_",
                                        norm_xy = False)

print("Slice")
fit_norm_df, val_norm_df = slice_df(df = fit_val_norm_df,
                                    params = params,
                                    filter_col = "filter_",
                                    y_min = params['y_c']+3,
                                    isnorm=False
                                    )

print("Shape of fit_norm_df: ", str(np.shape(fit_norm_df)))
print("Shape of val_norm_df: ", str(np.shape(val_norm_df)))

**Compare before fit**

In [None]:
fit_val_g = fit_val_df.groupby(by="filter_")
fit_val_c_g = top_cutted_df.groupby(by="filter_")

filters = list(fit_val_g.indices.keys())

i = 5
plot_df(fit_val_g.get_group(filters[i]),
        x_col=params['x_col'],
        y_col=params['y_col'],
        z_col = "z", title="t",
        w = 10,
        h = 6)

plot_df(fit_val_c_g.get_group(filters[i]),
        x_col=params['x_col'],
        y_col=params['y_col'],
        z_col = "z", title="t",
        w = 10,
        h = 6)

**Fit Models**

In [None]:
def xmodel_prediction(func, model_args, params):
    """This function return the z values predicted to 0 <= x <= x_max and 0 <= y <= y_max
    
    """
    dataframe = []

    y_arr = np.arange(0, params['y_max'] + params['y_step'], params['y_step']) #get unique values to y
    x_arr = np.arange(0, params['x_max'] + params['x_step'], params['x_step'])

    ones = np.ones(len(y_arr)) #aux arr to make broadcast

    for x in x_arr: #to each unique value of x
        df_i = pd.DataFrame({params['y_col']: y_arr,
                            params['x_col']: ones*x, #broadcast
                            "z": func(y_arr/params['y_max'], *model_args[x])})
        dataframe.append(df_i)

    dataframe_df = pd.concat(dataframe)
    dataframe_df = dataframe_df.sort_values(by=[params['x_col'], params['y_col']])

    return dataframe_df
    
def ymodel_prediction(func, model_args, params):
    """This function return the z values predicted to 0 <= x <= x_max and 0 <= y <= y_max
    """
    dataframe = []

    y_arr = np.arange(0, params['y_max'] + params['y_step'], params['y_step']) #get unique values to y
    x_arr = np.arange(0, params['x_max'] + params['x_step'], params['x_step']) #get unique values to x
    ones = np.ones(len(x_arr)) #aux arr to make broadcast
    
    for y in y_arr: #to each unique value of x
        df_i = pd.DataFrame({params['x_col']: x_arr,
                            params['y_col']: ones*y, #broadcast
                            "z": func(x_arr/params['y_max'], *model_args[y])})
        dataframe.append(df_i)

    dataframe_df = pd.concat(dataframe)
    dataframe_df = dataframe_df.sort_values(by=[params['x_col'], params['y_col']])

    return dataframe_df

In [None]:
def build_submodels(model_func, fit_znorm_df, filter_col, params):
    """Build the submodels to predict z to 0 <= x <= x_max and 0 <= y <= y_max.
    @fit_znorm_df: only z can be normalized.
    """

    print("building yk models")
    p_y = params.copy()
    p_y['x_col'] = params['y_col']
    p_y['y_col'] = params['x_col']
    p_y['y_max'] = params['x_max']
    p_y['x_max'] = params['y_max']
    yk_acc_models = build_models(model_func, fit_znorm_df, filter_col, p_y) #return models with keys (in y values) 
    
    #--------- TODO predict to x > x_k TODO ---------------------------------
    print("Predicting from 0 <= x <= x_max")
    p = params.copy()
    p_y['y_max'] = params['y_k'] #limit y_max because the model haven't keys to y > y_k

    yk_pred_df = [] #complete y first

    for filter_ in fit_znorm_df.groupby(by=filter_col).indices.keys(): #to each x, predict to 0 <= y <= y_max
        pred_df_filter = ymodel_prediction(func = model_func, model_args = yk_acc_models[filter_], params = p_y) #Make prediction to current data of a filter
        pred_df_filter.loc[:,"filter_"] = filter_
        yk_pred_df.append(pred_df_filter)
        
    yk_pred_df = pd.DataFrame(yk_pred_df)
    yk_pred_df = yk_pred_df[yk_pred_df[params['x_col']] > params['x_k']]#persist only x > x_k

    fit_znorm_df = pd.concat([fit_znorm_df, yk_pred_df])

    print("building xmodels")
    return build_models(model_func, fit_znorm_df, filter_col, params) #do fit to 0 <= x <= x_max as keys

In [None]:
common_dist = CommonDistribution()
model_func = common_dist.CD_logo_normal

acc_models = build_submodels(model_func = model_func,
                            fit_znorm_df = fit_val_norm_df, 
                            filter_col = "filter_",
                            params = params)

**Export Model**

In [None]:
for k in acc_models.keys():

    if type(acc_models[k]) == type(dict()):

        for k2 in acc_models[k].keys():
            if type(acc_models[k][k2]) == type(np.array([])):
                acc_models[k][k2] = acc_models[k][k2].tolist()

export_params("./models/"+subpath+"/"+model_func.__name__+".json", acc_models)

In [None]:
filters = list(fit_val_norm_df.groupby(by="filter_").indices.keys())

func = acc_models["func"]

**Get over all error**

In [None]:
def overall_error(acc_models, database, norm_df, filter_col):
    """
    @acc_models:
    @database: dictionary with the database used to define the differents groups of the dataset
    @norm_df: the dataset to be used to compare
    @filter_col:
    """
    #prepare the data to iterate
    g_df = norm_df.groupby(by=filter_col)
    filters = list(g_df.indices.keys())

    common_dist = CommonDistribution() #Instance with the supported distributions
    model_func = getattr(common_dist, acc_models['func']) #get the function by name as string
    
    max_df = g_df.max()

    over_all_error = []
    p = acc_models["params"].copy()
    for filter_ in filters:
        print(filter_)
        p['x_max'] = max_df.loc[filter_, p['x_col']] * p['x_max'] 
        p['y_max'] = max_df.loc[filter_, p['y_col']] * p['y_max']

        pred_df_filter = xmodel_prediction(model_func, acc_models[filter_], p) #Make prediction to current data of a filter
        
        df_filter = g_df.get_group(filter_) #get the dataframe of this filter

        #Revert normalization of the all data
        z_max = database['z_max'][int(acc_models[filter_]['group'])]
        pred_df_filter.loc[:,'z'] = pred_df_filter['z']*z_max
        df_filter.loc[:,'z'] = df_filter['z']*z_max

        over_all_error.append({
            "filter_": filter_,
            "mae_error": mean_absolute_error(df_filter['z'],pred_df_filter['z'])
        })

    return pd.DataFrame(over_all_error)

In [None]:
error_summary_df = overall_error(acc_models, database, fit_val_df, "filter_")

**Plot results**

In [None]:
g_df = fit_val_norm_df.groupby(by="filter_")
filters = list(g_df.indices.keys())
filter_ = filters[1]

dataframe = pd.DataFrame()
df_filter = g_df.get_group(filter_) #get the dataframe of this filter
y_arr = np.arange(0, params['y_max'] + params['y_step'], params['y_step']) #get unique values to y
ones = np.ones(len(y_arr)) #aux arr to make broadcast

for x in df_filter[params['x_col']].unique(): #to each unique value of x
    df_i = pd.DataFrame({params['y_col']: y_arr,
                        params['x_col']: ones*x,
                        "z": common_dist.CD_logo_normal(y_arr/params['y_max'], *acc_models[filter_][x])})
    dataframe = pd.concat([dataframe, df_i], axis=0)

dataframe = dataframe.sort_values(by=[params['x_col'], params['y_col']])

#Revert normalization of the all data
dataframe.loc[:,'z'] = dataframe.loc[:,'z']*database['z_max'][df_filter.iloc[0]['group']]
df_filter.loc[:,'z'] = df_filter.loc[:,'z']*database['z_max'][df_filter.iloc[0]['group']]

plot_df(data_df = df_filter,
        x_col = params['x_col'],
        y_col = params['y_col'],
        z_col = "z",
        title = "Original: "+filter_, w = 10, h = 6)

plot_df(data_df = dataframe,
        x_col = params['x_col'],
        y_col = params['y_col'],
        z_col = "z",
        title = "Logo Normal: "+filter_, w = 10, h = 6)
