In [1]:
partition = 478

In [2]:
import sys
from train import main
from itertools import product  
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


In [3]:

n_tree_values = [5, 10, 20, 50, 100, 150, 200, 300]
tree_depth_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
hidden_dim = [1024, 768]
batch_size_values = [256, 512, 1000]

#default 0.5
tree_feature_rates = [0.1, 0.2, 0.3, 0.4, 0.5]

#default 0.3
feat_dropouts = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]

#default 0.0001
lrs = [0.0001, 0.001, 0.01]

best_score = 0
best_config = {}

number_of_partitions = len(n_tree_values) * len(tree_depth_values) * len(hidden_dim) * len(batch_size_values) * len(tree_feature_rates) * len(feat_dropouts) * len(lrs)

i = 0
for n_tree, t_depth, hd, batch_size, tfr, do, lr in product(n_tree_values, tree_depth_values, hidden_dim, batch_size_values, tree_feature_rates, feat_dropouts, lrs):
    i = i + 1
    print(f"\nRunning: n_tree={n_tree}, t_depth={t_depth}, hd={hd}, batch_size={batch_size} tree_feature_rates={tfr}, feat_dropouts={do}, hd={hd}, lrs={lr}")
    print(f"combination: {i} / {number_of_partitions}")
    sys.argv = [
        'train.py',
        '-dataset', f'gtd{partition}',
        '-n_class', '30',
        '-gpuid', '0',
        '-n_tree', str(n_tree),
        '-tree_depth', str(t_depth),
        '-batch_size', str(batch_size),
        '-hidden_dim', str(hd),
        '-epochs', '300',
        '-verbose', '0',
        '-tree_feature_rate', str(tfr),
        '-feat_dropout', str(do),
        '-lr', str(lr),
        '-jointly_training',
        '-searching', '1'
    ]
    
    complete = main()
    print(complete)

    # Read best score from file (assumes one run per file)
    result_file = f"results/result_gtd{partition}"
    with open(result_file, "r") as f:
        lines = f.readlines()
        for line in lines:
            if "Best Accuracy" in line:
                acc = float(line.split()[2])
                if acc > best_score:
                    best_score = acc
                    best_config = {
                        'n_tree': n_tree,
                        'tree_depth': t_depth,
                        'batch_size': batch_size,
                        'hidden_dim': hd,
                        'tree_feature_rate': tfr,
                        'feat_dropout': do,
                        'lr': lr
                    }
print("\nBest hyperparameter configuration:")
print(best_config)
print(f"Best accuracy: {best_score}")



Running: n_tree=5, t_depth=1, hd=1024, batch_size=256 tree_feature_rates=0.1, feat_dropouts=0.0, hd=1024, lrs=0.0001
combination: 1 / 51840
Use gtd478 dataset
Patience: 100


Training Epochs: 100%|██████████| 300/300 [00:40<00:00,  7.33it/s]



Best Accuracy: 0.216068
complete

Running: n_tree=5, t_depth=1, hd=1024, batch_size=256 tree_feature_rates=0.1, feat_dropouts=0.0, hd=1024, lrs=0.001
combination: 2 / 51840
Use gtd478 dataset
Patience: 100


Training Epochs:  66%|██████▋   | 199/300 [00:26<00:13,  7.44it/s]


Early stopping at epoch 200

Best Accuracy: 0.341816
complete

Running: n_tree=5, t_depth=1, hd=1024, batch_size=256 tree_feature_rates=0.1, feat_dropouts=0.0, hd=1024, lrs=0.01
combination: 3 / 51840
Use gtd478 dataset
Patience: 100


Training Epochs:  38%|███▊      | 113/300 [00:15<00:25,  7.28it/s]


Early stopping at epoch 114

Best Accuracy: 0.271457
complete

Running: n_tree=5, t_depth=1, hd=1024, batch_size=256 tree_feature_rates=0.1, feat_dropouts=0.1, hd=1024, lrs=0.0001
combination: 4 / 51840
Use gtd478 dataset
Patience: 100


Training Epochs: 100%|██████████| 300/300 [00:41<00:00,  7.25it/s]



Best Accuracy: 0.194611
complete

Running: n_tree=5, t_depth=1, hd=1024, batch_size=256 tree_feature_rates=0.1, feat_dropouts=0.1, hd=1024, lrs=0.001
combination: 5 / 51840
Use gtd478 dataset
Patience: 100


Training Epochs:  76%|███████▌  | 227/300 [00:30<00:09,  7.37it/s]


Early stopping at epoch 228

Best Accuracy: 0.306886
complete

Running: n_tree=5, t_depth=1, hd=1024, batch_size=256 tree_feature_rates=0.1, feat_dropouts=0.1, hd=1024, lrs=0.01
combination: 6 / 51840
Use gtd478 dataset
Patience: 100


Training Epochs:  34%|███▍      | 103/300 [00:14<00:27,  7.25it/s]


Early stopping at epoch 104

Best Accuracy: 0.300399
complete

Running: n_tree=5, t_depth=1, hd=1024, batch_size=256 tree_feature_rates=0.1, feat_dropouts=0.2, hd=1024, lrs=0.0001
combination: 7 / 51840
Use gtd478 dataset
Patience: 100


Training Epochs:   7%|▋         | 22/300 [00:03<00:39,  6.97it/s]


KeyboardInterrupt: 

In [None]:
"""sys.argv = [
    'train.py',
    '-dataset', f'gtd{partition}',
    '-n_class', '30',
    '-gpuid', '0',
    '-n_tree', str(best_config['n_tree']),
    '-tree_depth', str(best_config['tree_depth']),
    '-batch_size', str(best_config['batch_size']),
    '-epochs', '1000',
    '-verbose', '1',
    '-jointly_training'
]"""

sys.argv = [
        'train.py',
        '-dataset', f'gtd{partition}',
        '-n_class', '30',
        '-gpuid', '0',
        '-n_tree', str(best_config['n_tree']),
        '-tree_depth', str(best_config['tree_depth']),
        '-batch_size', str(best_config['batch_size']),
        '-hidden_dim', str(best_config['hidden_dim']),
        '-epochs', '1500',
        '-verbose', '0',
        '-tree_feature_rate', str(best_config['tree_feature_rate']),
        '-feat_dropout', str(best_config['feat_dropout']),
        '-lr', str(best_config['lr']),
        '-jointly_training',
        '-searching', '0'
    ]

best_model, preds, targets, labels, epoch_logs = main()


In [None]:
from sklearn.metrics import classification_report

print(classification_report(targets, preds))

In [None]:
def plot_confusion_matrix(y_true, y_pred, labels, partition):
    cm = confusion_matrix(y_true, y_pred, labels=range(len(labels)))
    cm_normalized = cm.astype('float') / cm.sum(axis=1, keepdims=True)

    plt.figure(figsize=(18, 16))
    sns.heatmap(cm_normalized,
                annot=True,
                fmt=".2f",
                xticklabels=labels,
                yticklabels=labels,
                cmap="viridis",
                square=True,
                linewidths=0.5,
                cbar_kws={"shrink": 0.8})

    plt.title(f"Normalized Confusion Matrix (Partition gtd{partition})", fontsize=18)
    plt.xlabel("Predicted Label", fontsize=14)
    plt.ylabel("True Label", fontsize=14)
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.tight_layout()

    save_path = f"results/confusion_matrix_partition_gtd{partition}.png"
    plt.savefig(save_path, dpi=300)
    plt.close()

    print(f"Saved confusion matrix for partition gtd{partition} to {save_path}")



In [None]:
plot_confusion_matrix(targets, preds, labels, partition)