In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
from classificationtools import labellingdata, plotconfusionmat
from ginis import splitsets, ginigain
import matplotlib.pyplot as plt
from sklearn.metrics import balanced_accuracy_score, f1_score, cohen_kappa_score
from fpdf import FPDF

In [None]:
df = pd.read_excel('results/dataclass_new.xlsx', parse_dates=True)
# df = pd.read_excel('results/dataclass_onlyseism.xlsx', parse_dates=True)
df.set_index('Date', inplace=True)

seismdata = ['rmsSTRG','rmsSTRE','rmsSTRC','rmsSTRA','rmsSTR4','rmsSTR1']

for seis in seismdata:
    df[seis] = np.log2(df[seis])
    df[seis][ np.isinf(df[seis]) ] = np.nan

df['meanrmsSeismic'] =  df[seismdata].mean(axis=1)

def myf1score(a,b):
    return f1_score(a,b,average='macro')

        
def myginigain(a,b):
    y = splitsets(a,b)
    return ginigain(y)


metrics = {
    'balanced_accuracy': balanced_accuracy_score,
    'f1_score': myf1score,
    'gini_gain': myginigain,
}

dow = {
    0:"Giallo",
    1:"Arancione",
    2:"Rosso"}


In [None]:
for iddata in list(df.columns)[1:]:
    data = df[['CLASS',iddata]].dropna()

    min = data[iddata].min()
    max = data[iddata].max()

    n = 100
    s = np.linspace(min,max, n)
    
    pdf = FPDF()
    
    for met in metrics:

        perform = np.zeros((n,n))
        perform[:] = np.nan

        for i in range(n-1):
            for j in range(i+1,n):
                y = labellingdata(data[iddata].values, [s[i], s[j]], [0, 1, 2])
                perform[j,i] = metrics[met](data['CLASS'].values, y)

        # find the optimal thresholds (maximum gain)
        indx = np.where(perform == np.nanmax(perform))

        optthrs = [s[indx[1][0]], s[indx[0][0]]]


        fig = px.imshow(perform, origin='lower', x=s, y=s, labels={'x':'Orange Thr', 'y':'Red Thr', 'color':met})


        fig.update_layout(
            title={
                'text': iddata,
                'y':0.93,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top'},
            paper_bgcolor='rgba(0,0,0,0)',
            plot_bgcolor='rgba(0,0,0,0)')

        fig.show()

        fig.write_image("results/optimizing"+iddata+met+".png")


        plt.figure(figsize=(15,8))
        plt.plot(df[iddata], 'o-')
        plt.plot([df[iddata].index[0], df[iddata].index[-1]], [optthrs[0], optthrs[0]], color='orange', lw=2)
        plt.plot([df[iddata].index[0], df[iddata].index[-1]], [optthrs[1], optthrs[1]], color='red', lw=2)
        plt.xticks(fontsize=16)
        plt.yticks(fontsize=16)
        plt.savefig('results/timeseries'+iddata+met+'.png')

        ally = df[[iddata]].dropna()
        ally[iddata] = labellingdata(ally[iddata].values, optthrs, [0,1,2])
        plt.figure(figsize=(15,8))
        plt.plot(df['CLASS'], color='black', lw=3)
        plt.scatter(ally[iddata].index, ally[iddata].values, s=60, facecolors='none', edgecolors='b')
        plt.xticks(fontsize=16)
        plt.yticks([0,1,2], ['giallo', 'arancione', 'rosso'],fontsize=16, rotation=90)
        plt.savefig('results/class'+iddata+met+'.png')

        y = labellingdata(data[iddata].values, optthrs, [0,1,2])
        out = data[['CLASS']]
        out['predicted'] = y

        out['predicted'] = out['predicted'].map(dow)
        out['CLASS'] = out['CLASS'].map(dow)

        confplt = plotconfusionmat(out['CLASS'].values, out['predicted'].values, list(dow.values()), figsize=(5,4))
        confplt.savefig('results/confmat_'+iddata+met+'.png')

        bestper = metrics[met](out['CLASS'].values, out['predicted'].values)


        pdf.add_page()
        pdf.set_xy(0, 0)
        pdf.set_font('arial', 'B', 12)
        pdf.cell(0, 10, "Analisi di " + iddata + " con obiettivo max "+met, 0, 2, 'C')
        pdf.image("results/optimizing"+iddata+met+".png", x = None, y = None, w = 100, h = 0, type = '', link = '')
        pdf.set_xy(100, 40)
        pdf.set_font('arial', '', 12)
        pdf.cell(60, 10, 'Soglie Ottime: '+str(np.round(optthrs,1)), 0, 0, 'C')
        pdf.set_xy(0, 78)
        pdf.image('results/timeseries'+iddata+met+'.png', x = None, y = None, w = 200, h = 70, type = '', link = '')
        pdf.set_xy(0, 145)
        pdf.image('results/class'+iddata+met+'.png', x = None, y = None, w = 200, h = 70, type = '', link = '')
        pdf.set_xy(30, 210)
        pdf.image('results/confmat_'+iddata+met+'.png', x = None, y = None, w = 80, h = 0, type = '', link = '')
        pdf.set_xy(100, 230)
        pdf.set_font('arial', '', 12)
        pdf.cell(80, 10, 'best '+met+' '+str(np.round(bestper,3)), 0, 0, 'C')


    pdf.output('results/'+iddata+'.pdf', 'F')
