# Testing on synthetic data
Models evaluation on synthetic rhythmic data with:
- multiple component (n_components=1,2,3)
- Different levels of noise (0.3, 0.6, 0.9)
- Replicates as 1 since ARS oly work when there isn't replicates
- We create 10000 line by file

In [None]:
#import
from rda_package import rda 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
N_TEST=100
REPLICATES=1

In [None]:
#3h
for i in [1,2,3]:
    for j in [1,2,3]:
        FILENAME=f"c{i}_n0{3*j}.csv"
        NOISE=float(0.3*j)
        N_COMPONENTS=int(i)
        rda.synt_rhythmic_data(FILENAME,half_rnd=True,n_test=N_TEST,n_components=N_COMPONENTS,noise=NOISE,replicates=REPLICATES)
        rda.file_rda(FILENAME,metrics=True,half_rnd=True,n_components=N_COMPONENTS)

In [None]:
"""for i in [1,2,3]:
    for j in [1,2,3]:
        FILENAME=f"c{i}_n0{3*j}.csv"
        NOISE=float(0.3*j)
        N_COMPONENTS=int(i)
        rda.make_metrics(FILENAME,half_rnd=True)
        rda.plot_metrics(FILENAME)"""

In [None]:
import pandas as pd
df_after_ind= pd.DataFrame()
for i in [1,2,3]:
    for j in [1,2,3]:
        filename=f"c{i}_n0{3*j}.csv"
        df_after = pd.read_csv(f"Out/{filename[:-4]}/metrics_{filename[:-4]}.csv")
        df_after['filename']=filename
        df_after_ind =pd.concat([df_after_ind,df_after])
df_after_ind[df_after_ind['model']=='ARS']

In [None]:
#mcc models evaluation
ncols = 3
nrows = 3
fig, axes = plt.subplots(ncols = ncols, nrows = nrows, sharey=False)
axes = axes.flatten()         
fig.set_size_inches(15, 15)
filenames = []
for i in [1,2,3]:
    for j in [1,2,3]:
        filename = f"c{i}_n0{3*j}.csv"
        filenames.append(filename)
for ax, filename in zip(axes,filenames):
            df_metrics = pd.read_csv(f"Out/{filename[:-4]}/metrics_{filename[:-4]}.csv")
            sns.barplot(data=df_metrics, x='model', y='mcc', ax=ax, ci=68) # ci=68 --> standard error!
            ax.set_ylabel(f'n_components = {filename[1]}')
            ax.set_xlabel(f'noise = 0.{filename[-5]}')
plt.suptitle(f'Matthews Correlation Coefficient for models evaluation')
fig.subplots_adjust(top=0.95)
plt.savefig(f"Out/models_mcc.png", bbox_inches="tight", facecolor='white')
plt.show()

In [None]:
#models evaluation on 1 dataset
i=1
j=1
filename=f"c{i}_n0{3*j}.csv"
df_metrics = pd.read_csv(f"Out/{filename[:-4]}/metrics_{filename[:-4]}.csv")
ncols = 2
nrows = 3
fig, axes = plt.subplots(ncols = ncols, nrows = nrows, sharey=False)
axes = axes.flatten()         
fig.set_size_inches(10, 10)
metrics = ["precision", "f1", "recall", "accuracy", "auc", "mcc"]
for ax, metric in zip(axes, metrics):
    sns.barplot(data=df_metrics, x='model', y=metric, ax=ax, ci=68) # ci=68 --> standard error!
    ax.set_ylabel(metric)
plt.suptitle(f'Metrics  {filename[:-4]}')
fig.subplots_adjust(top=0.95)
plt.savefig(f"Out/{filename[:-4]}/{filename[:-4]}_metrics.png", bbox_inches="tight", facecolor='white')
plt.show()

In [None]:
#global models evaluation
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
df_after_ind= pd.DataFrame()
for i in [1,2,3]:
    for j in [1,2,3]:
        filename=f"c{i}_n0{3*j}.csv"
        df_after = pd.read_csv(f"Out/{filename[:-4]}/metrics_{filename[:-4]}.csv")
        df_after['filename']=filename
        df_after_ind =pd.concat([df_after_ind,df_after])
df_after_ind[df_after_ind['model']=='ARS']
ncols = 2
nrows = 3
models = ['ARS','JTK','LS','meta2d','Cosinor','Rain']
metrics = ["precision", "f1", "recall", "accuracy", "auc", "mcc"]
for model in models:
    fig, axes = plt.subplots(ncols = ncols, nrows = nrows, sharey=False)
    axes = axes.flatten()         
    fig.set_size_inches(16, 16)
    for ax, metric in zip(axes, metrics):
        #sns.barplot(data=df_after_ind, x='model', y=metric, ax=ax, ci=95) # ci=95 --> 95% confidence interval
        df=df_after_ind[df_after_ind['model']==model]
        print(df['filename'].to_numpy())
        sns.barplot(data=df, x='filename', y=metric, ax=ax, ci=68) # ci=68 --> standard error!
        ax.set_xticklabels(df['filename'].to_numpy(), rotation=45)
        #ax.set_xlabel("")
        #ax.set_title(metric)
        ax.set_ylabel(metric)
        #if i % ncols:
        #    ax.set_ylabel("")
        #else:
        #    ax.set_ylabel("Frequency")
    

    plt.suptitle(model)
    fig.subplots_adjust(top=0.95)

    #plt.savefig(folder_out+f"\\{label}_metrics_after_ind.pdf", bbox_inches="tight")
    plt.savefig(f"Out/{model}_metrics.png", bbox_inches="tight",facecolor='white')

    plt.show()

In [11]:
#global std
import pandas as pd
import numpy as np
import plotly.figure_factory as ff
df_after_ind= pd.DataFrame()
for i in [1,2,3]:
    for j in [1,2,3]:
        filename=f"c{i}_n0{3*j}.csv"
        df_after = pd.read_csv(f"Out/{filename[:-4]}/metrics_{filename[:-4]}.csv")
        df_after['filename']=filename
        df_after_ind =pd.concat([df_after_ind,df_after])
models = ['ARS','JTK','LS','meta2d','Cosinor','Rain']
metrics = ["precision", "f1", "recall", "accuracy", "auc"]
df = pd.DataFrame(columns = ['model','avg_std'], dtype=float)
for model in models:
    res=[]
    for metric in metrics:
        std = df_after_ind[df_after_ind['model']==model][metric].std()
        res.append(std)
        #print(model,metric,std°
    #print(res)
    df =df.append({'model':model,'avg_std': np.array(res).mean()}, ignore_index=True)
fig1 = ff.create_table(df.sort_values(by='avg_std'))
fig1.update_layout(width=500, height=300)
fig1.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [None]:
#std fixed number of components
import pandas as pd
import numpy as np
import plotly.figure_factory as ff
for i in [1,2,3]:
    df_after_ind= pd.DataFrame()
    df = pd.DataFrame(columns = ['model'], dtype=float)
    for j in [1,2,3]:
        filename=f"c{i}_n0{3*j}.csv"
        df_after = pd.read_csv(f"Out/{filename[:-4]}/metrics_{filename[:-4]}.csv")
        df_after['filename']=filename
        df_after_ind =pd.concat([df_after_ind,df_after])
    models = ['ARS','JTK','LS','meta2d','Cosinor','Rain']
    metrics = ["precision", "f1", "recall", "accuracy", "auc"]
    df = pd.DataFrame(columns = ['model'], dtype=float)
    #print(df_after_ind)
    for model in models:
        res=[]
        for metric in metrics:
            std = df_after_ind[df_after_ind['model']==model][metric].std()
            res.append(std)
            #print(model,metric,std)
        df =df.append({'model':model,f'avg_std_c{i}': np.array(res).mean()}, ignore_index=True)
    fig1 = ff.create_table(df.sort_values(by=f'avg_std_c{i}'))
    fig1.update_layout(width=1000, height=300)
    fig1.show()

In [None]:
#std fixed noise
import pandas as pd
import numpy as np
import plotly.figure_factory as ff

for j in [1,2,3]:
    df_after_ind= pd.DataFrame()
    df = pd.DataFrame(columns = ['model'], dtype=float)
    for i in [1,2,3]:
        filename=f"c{i}_n0{3*j}.csv"
        df_after = pd.read_csv(f"Out/{filename[:-4]}/metrics_{filename[:-4]}.csv")
        df_after['filename']=filename
        df_after_ind =pd.concat([df_after_ind,df_after])
    models = ['ARS','JTK','LS','meta2d','Cosinor','Rain']
    metrics = ["precision", "f1", "recall", "accuracy", "auc"]
    df = pd.DataFrame(columns = ['model'], dtype=float)
    #print(df_after_ind)
    for model in models:
        res=[]
        for metric in metrics:
            std = df_after_ind[df_after_ind['model']==model][metric].std()
            res.append(std)
            #print(model,metric,std)
        df =df.append({'model':model,f'avg_std_n0{3*j}': np.array(res).mean()}, ignore_index=True)
    fig1 = ff.create_table(df.sort_values(by=f'avg_std_n0{3*j}'))
    fig1.update_layout(width=1000, height=300)
    fig1.show()