In [1]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, classification_report, confusion_matrix
from config import *

In [2]:
result_dir=os.path.join('results','problem_classification_stats')

In [3]:
accuracies=[]
for dimension in [3,5,10,20]:
    for sample_count_factor in [50,100]:
        for instance_count in [999]:
            for fold in range(0,10):
                for d_model in [30,50]:
                    for n_heads in [1,3]:
                        for n_layers in [1,3]:
                            config_dir=f'dim_{dimension}_instances_{instance_count}_samples_{sample_count_factor}_fold_{fold}_n_heads_{n_heads}_n_layers_{n_layers}_d_model_{d_model}_d_k_None_d_v_None'
                            test_report=os.path.join(result_dir, config_dir, 'test_classification_report.csv')
                            if os.path.isfile(test_report):
                                report=pd.read_csv(test_report,index_col=0)
                                accuracies+=[(dimension,sample_count_factor,instance_count,fold,d_model,n_heads,n_layers,report.loc['support','accuracy'])]
                            else:
                                print(test_report)



KeyboardInterrupt: 

In [None]:
accuracy_df=pd.DataFrame(accuracies,columns=['dimension','sample_count_factor','instance_count','fold','d_model','n_heads','n_layers','accuracy'])
accuracy_df['n_layers, n_heads']=accuracy_df.apply(lambda r: str(r['n_layers'])+', ' + str(r['n_heads']), axis=1)
accuracy_df['sample size']=accuracy_df['sample_count_factor'].apply(lambda x: f'{x}d')
accuracy_df['model dimension']=accuracy_df['d_model']

In [None]:
sns.set(font_scale=2)
plt.figure(figsize=(8,16))
g=sns.catplot(
    data=accuracy_df, x='model dimension', y='accuracy',
    col='sample size', row='dimension', hue='n_layers, n_heads', kind='box', palette=color_palette_4
)
for i, row_axes in enumerate(g.axes):
    for j, ax in enumerate(row_axes):
        title = ax.get_title()
        title_parts = title.split('|')
        title = '\n'.join(title_parts)
        ax.set_title(title)


plt.savefig('figures/configuration_accuracy.pdf')

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
sns.set(font_scale=1.8)
accuracies=[]
for dimension in [3,5,10,20]:
    for sample_count_factor in [50]:
        for instance_count in [999]:
            for d_model in [30]:
                for n_heads in [1]:
                    for n_layers in [1]:
                        all_folds_predictions=pd.DataFrame()
                        for fold in range(0,10):
                            config_dir=f'dim_{dimension}_instances_{instance_count}_samples_{sample_count_factor}_fold_{fold}_n_heads_{n_heads}_n_layers_{n_layers}_d_model_{d_model}_d_k_None_d_v_None'
                            test_predictions_file=os.path.join(result_dir, config_dir, 'test_ys_predictions.csv')
                            if os.path.isfile(test_predictions_file):
                                test_predictions=pd.read_csv(test_predictions_file,index_col=0)
                                all_folds_predictions=pd.concat([all_folds_predictions,test_predictions])
                            else:
                                print('not found', test_predictions_file)
                        print(all_folds_predictions)
                        if all_folds_predictions.shape[0]>0:
                            
                            

                            # Example predicted labels

                            #ConfusionMatrixDisplay.from_predictions(all_folds_predictions['ys'], all_folds_predictions['predictions'])
                            #plt.show()
                            cm = confusion_matrix(all_folds_predictions['ys'], all_folds_predictions['predictions'], labels=list(range(1,25)))
                        
                            # Create a heatmap of the confusion matrix with seaborn
                            plt.figure(figsize=(19,15))
                            cm=pd.DataFrame(cm, index=list(range(1,25)),columns=list(range(1,25)))
                            sns.heatmap(cm, annot=True, fmt="d", cmap="YlGnBu", cbar=True)

                            # Set labels and title
                            plt.xlabel('Predicted Labels')
                            plt.ylabel('True Labels')
                            #plt.title('Confusion Matrix')
                            plt.tight_layout()
                            #plt.show()
                            plt.savefig(f'figures/static_problem_classification/confusion_matrix_dim_{dimension}_instances_{instance_count}_samples_{sample_count_factor}.pdf', bbox_inches='tight')
                            

In [None]:
def get_accuracy_from_report_file(test_report):
    if os.path.isfile(test_report):
        report=pd.read_csv(test_report,index_col=0)
        return report.loc['support','accuracy']
    else:
        print(test_report)
        return None

In [None]:
agg_accuracies=[]

for dimension in [3,5,10,20]:
    for sample_count_factor in [50]:
        for instance_count in [999]:
            for fold in range(0,10):
                for d_model in [30]:
                    for n_heads in [1]:
                        for n_layers in [1]:
                            config_dir=f'dim_{dimension}_instances_{instance_count}_samples_{sample_count_factor}_fold_{fold}_n_heads_{n_heads}_n_layers_{n_layers}_d_model_{d_model}_d_k_None_d_v_None'
                            test_report=os.path.join('results/problem_classification_stats', config_dir, 'test_classification_report.csv')
                            accuracy=get_accuracy_from_report_file(test_report)
                            agg_accuracies+=[(dimension,sample_count_factor,instance_count,fold,d_model,n_heads,n_layers,accuracy, 'all')]
                            
                            for aggregation in ['min','max','std','mean']:
                                config_dir=f'dim_{dimension}_instances_{instance_count}_samples_{sample_count_factor}_fold_{fold}_n_heads_{n_heads}_n_layers_{n_layers}_d_model_{d_model}_d_k_None_d_v_None_aggregations_{aggregation}'
                                test_report=os.path.join('results_aggregations/problem_classification_stats', config_dir, 'test_classification_report.csv')
                                accuracy=get_accuracy_from_report_file(test_report)
                                agg_accuracies+=[(dimension,sample_count_factor,instance_count,fold,d_model,n_heads,n_layers,accuracy, aggregation)]


agg_accuracy_df=pd.DataFrame(agg_accuracies, columns=['dimension','sample_count_factor','instance_count','fold','d_model','n_heads','n_layers','accuracy','aggregation'])
sns.set(font_scale=1.3)
g=sns.catplot(
    height=3, aspect=1.5, data=agg_accuracy_df, x='aggregation', y='accuracy', row='dimension', kind='box', palette=color_palette_4
)
plt.tight_layout()
plt.savefig('figures/aggregation_accuracy.pdf')