In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
curdir = Path('./statoutput')

# Dati relativi alla validazione sul TEST SET
test_validation_data = pd.DataFrame()
for f in curdir.glob('eval*'):
    filename = f.name
    df = pd.read_pickle(f)
    actfunc = df.get('params.action_activation', ['sigmoid'])
    df['category'] = f"B{df.at[0, 'params.batch']}-E{df.at[0, 'params.epochs']}-H{df.at[0, 'params.hidden_output_dim']}\
-LR{df.at[0, 'params.learning_rate']}-eps{df.at[0, 'params.tolerance']}-ACT-{actfunc[0]}"
    df['filename'] = filename
    test_validation_data = pd.concat([test_validation_data, df])
print(test_validation_data.head())
print(test_validation_data.shape)

   action_accuracy  action_perplexity  attribute_accuracy  \
0         0.840467           4.647734            0.760981   
0         0.844543           4.939461            0.763818   
0         0.851399           3.448803            0.776654   
0         0.845655           4.842728            0.771190   
0         0.841949           4.911037            0.757051   

                                    confusion_matrix  params.batch  \
0  [[751.0, 53.0, 9.0, 7.0, 18.0], [23.0, 1084.0,...            12   
0  [[749.0, 46.0, 12.0, 10.0, 13.0], [26.0, 1081....            12   
0  [[746.0, 49.0, 10.0, 4.0, 11.0], [27.0, 1084.0...            12   
0  [[742.0, 45.0, 13.0, 13.0, 15.0], [29.0, 1086....            12   
0  [[750.0, 45.0, 16.0, 11.0, 12.0], [21.0, 1089....            12   

   params.epochs  params.hidden_output_dim  params.seed  params.learning_rate  \
0             10                       256      9555209               0.00005   
0             10                       256       9

In [2]:
# Dati relativi al training
training_stat = pd.DataFrame()
for f in curdir.glob('stats*'):
    filename = f.name
    df = pd.read_pickle(f)
    df['filename'] = filename
    training_stat = pd.concat([training_stat, df])
print(training_stat.head())
print(training_stat.shape)

       Training Loss  Valid. Loss  Valid. Accur. class.  \
epoch                                                     
1           1.133720     1.064887              0.845147   
2           1.070569     1.071257              0.840307   
3           1.054355     1.063280              0.845147   
4           1.045294     1.057506              0.849986   
5           1.038272     1.064757              0.841731   

       Valid. Accur. mult.label Training Time Validation Time  \
epoch                                                           
1                      0.895531       0:08:33         0:00:28   
2                      0.906633       0:08:37         0:00:27   
3                      0.920011       0:08:34         0:00:28   
4                      0.921719       0:08:34         0:00:27   
5                      0.932252       0:08:34         0:00:27   

                                                 metrics  \
epoch                                                      
1      {'a

In [None]:
# Dati relativi alla validazione durante il training
validation_data = pd.DataFrame()
for f in curdir.glob('testdata*'):
    filename = f.name
    df = pd.read_pickle(f)
    df['filename'] = filename
    validation_data = pd.concat([validation_data, df])
print(validation_data.head())
print(validation_data.shape)

In [None]:
# Analisi dati per selezione da test dei casi scelti per recupero dati migliore esecuzione
case_study = test_validation_data[test_validation_data['category']=='B12-E6-H768-LR5e-05-eps1e-08-ACT-softmax']
print(case_study.shape)
print(f"MAX action accuracy: {max(case_study['action_accuracy'])}, MAX attribute accuracy: {max(case_study['attribute_accuracy'])}")
print(case_study)
print('Äction accuracy order')
print(case_study.sort_values(by=['action_accuracy']))
print('Ättribute accuracy order')
print(case_study.sort_values(by=['attribute_accuracy']))
print('Order by act_acc+att_acc-act_per')
arbitrary_choice = case_study['action_accuracy'] + case_study['attribute_accuracy'] - case_study['action_perplexity']
case_study['arbitrary_choice'] = arbitrary_choice
print(case_study.sort_values(by=['arbitrary_choice']))

In [None]:
# Scelta in base a calcolo arbitrario
choice = max(case_study['arbitrary_choice'])
df_test_validation = case_study[case_study['arbitrary_choice']==choice]
print(df_test_validation)
filename = df_test_validation['filename'][0]
training_filename = f"stats{filename[4:]}"
validation_filename = f"testdata{filename[4:]}"
print(f"{training_filename} {validation_filename}")
df_training = training_stat[training_stat['filename']==training_filename]
df_validation = validation_data[validation_data['filename']==validation_filename]
print(f"Dimensioni statistiche training: {df_training.shape}")
print(f"Dimensioni statische validazione training: {df_validation.shape}")

In [None]:
print(df_training)

In [None]:
print(df_validation)

In [None]:
# Confronto tra tutti i campioni del valore "migliore" con peso perplexity in centesimi
df = test_validation_data.copy()
arbitrary_choice = df['action_accuracy'] + df['attribute_accuracy'] - df['action_perplexity']/100
df['arbitrary_choice'] = arbitrary_choice
print(df.sort_values(by='arbitrary_choice', ascending=False).loc[:, ['action_accuracy', 'action_perplexity', 'attribute_accuracy', 'arbitrary_choice', 'category']])

In [None]:
# Confronto tra tutti i campioni del valore "migliore" con peso perplexity in centesimi ma dimezzato
df = test_validation_data.copy()
arbitrary_choice = df['action_accuracy'] + df['attribute_accuracy'] - df['action_perplexity']/200
df['arbitrary_choice'] = arbitrary_choice
print(df.sort_values(by='arbitrary_choice', ascending=False).loc[:, ['action_accuracy', 'action_perplexity', 'attribute_accuracy', 'arbitrary_choice', 'category']])

In [None]:
# Confronto tra tutti i campioni del valore "migliore" con peso perplexity in millesimi
df = test_validation_data.copy()
arbitrary_choice = df['action_accuracy'] + df['attribute_accuracy'] - df['action_perplexity']/1000
df['arbitrary_choice'] = arbitrary_choice
print(df.sort_values(by='arbitrary_choice', ascending=False).loc[:, ['action_accuracy', 'action_perplexity', 'attribute_accuracy', 'arbitrary_choice', 'category']])

In [None]:
import matplotlib.pyplot as plt
#% matplotlib inline

import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

# Plot the learning curve.
plt.plot(df_training['Training Loss'], 'b-o', label="Training")
plt.plot(df_training['Valid. Loss'], 'g-o', label="Validation")

# Label the plot.
plt.title("Training & Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
epochs = df_test_validation['params.epochs'][0]
plt.xticks([x+1 for x in range(epochs)])

plt.savefig(f"./plots/loss-{df_test_validation['category'][0]}.png")


In [None]:
import matplotlib.pyplot as plt
#% matplotlib inline

import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

act_acc = [x['action_accuracy'] for x in df_training.metrics]
att_acc = [x['attribute_accuracy'] for x in df_training.metrics]
x_ticks = [x for x in range(len(act_acc))]

# Plot the learning curve.
plt.plot(act_acc, 'b-o', label="Actions")
plt.plot(att_acc, 'g-o', label="Attributes")

# Label the plot.
plt.title("Actions and attributes accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.xticks(ticks = x_ticks, labels = [str(x+1) for x in x_ticks])

plt.savefig(f"./plots/accuracy-{df_test_validation['category'][0]}.png")

In [None]:
import matplotlib.pyplot as plt
#% matplotlib inline

import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

act_per = [x['action_perplexity'] for x in df_training.metrics]
x_ticks = [x for x in range(len(act_per))]

# Plot the learning curve.
plt.plot(act_per, 'b-o')

# Label the plot.
plt.title("Actions perplexity")
plt.xlabel("Epoch")
plt.ylabel("Perplexity")
plt.xticks(ticks = x_ticks, labels = [str(x+1) for x in x_ticks])

plt.savefig(f"./plots/perplexity-{df_test_validation['category'][0]}.png")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sn
import plotly.figure_factory as ff

znp = np.array(df_test_validation.confusion_matrix[0])
print("original:")
print(znp)
z = znp.transpose()
print("transpose:")
print(z)
x = ['AddToCart', 'None', 'SearchDatabase', 'SearchMemory', 'SpecifyInfo']
y = ['AddToCart', 'None', 'SearchDatabase', 'SearchMemory', 'SpecifyInfo']
     
# change each element of z to type string for annotations
z_text = [[str(y) for y in x] for x in z]
     
# set up figure 
fig = ff.create_annotated_heatmap(z, x=x, y=y, annotation_text=z_text, colorscale='Viridis')

# add title
fig.update_layout(title_text='<i><b>Actions confusion matrix</b></i>',title_x=0.5
                  #xaxis = dict(title='x'),
                  #yaxis = dict(title='x')
                 )

# add custom xaxis title
fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=0.5,
                        y=-0.15,
                        showarrow=False,
                        text="Predicted value",
                        xref="paper",
                        yref="paper"))

# add custom yaxis title
fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=-0.25,
                        y=0.5,
                        showarrow=False,
                        text="Real value",
                        textangle=-90,
                        xref="paper",
                        yref="paper"))

# adjust margins to make room for yaxis title
fig.update_layout(margin=dict(t=50, l=200))

# add colorbar
fig['data'][0]['showscale'] = True
fig.show()
# fig.write_image("plots/action_confusion_matrix.png")