In [11]:
import matplotlib.pyplot as plt
import csv
import numpy as np
import os

PLOT_SIZE = (10, 6)

def read_mlflow_csv(csv_path):
    # headers: run_id,key,value,step,timestamp
    # we are only interested in: acc_top1, acc_top1_train, acc_top1_val for each step (epoch)

    acc_top1_arr, acc_top1_train_arr, acc_top1_val_arr = [], [], []
    loss_l_arr, loss_u_arr = [], []
    with open(csv_path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        for row in csv_reader:
            if row[1].strip() == 'acc_top1':
                acc_top1_arr.append(float(row[2]))
            elif row[1].strip() == 'acc_top1_train':
                acc_top1_train_arr.append(float(row[2]))
            elif row[1].strip() == 'acc_top1_val':
                acc_top1_val_arr.append(float(row[2]))
            elif row[1].strip() == 'losses_l_train':
                loss_l_arr.append(float(row[2]))
            elif row[1].strip() == 'losses_u_train':
                loss_u_arr.append(float(row[2]))

    return acc_top1_train_arr, acc_top1_val_arr, acc_top1_arr, loss_l_arr, loss_u_arr

def read_tensorboard_csvs(csv_paths):
    # headers: Wall time, Step, Value
    csv_path_arr = csv_paths.split(";")
    acc_arr_arr = []
    for csv_path in csv_path_arr:
        acc_top1_arr = []
        with open(csv_path, 'r') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            next(csv_reader)
            for row in csv_reader:
                acc_top1_arr.append(float(row[2]))
        acc_arr_arr.append(acc_top1_arr)
    return acc_arr_arr

def read_csv(csv_path):
    with open(csv_path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader)
        train_acc_arr, val_acc_arr, test_acc_arr, train_loss_arr = [], [], [], []
        for row in csv_reader:
            if len(row) < 4:
                break
            train_loss_arr.append(float(row[0]))
            train_acc_arr.append(float(row[1]))
            test_acc_arr.append(float(row[2]))
            val_acc_arr.append(float(row[3]))

    if max(train_acc_arr) < 2:
        train_acc_arr = [acc * 100 for acc in train_acc_arr]
        test_acc_arr = [acc * 100 for acc in test_acc_arr]
        val_acc_arr = [acc * 100 for acc in val_acc_arr]
    return train_acc_arr, val_acc_arr, test_acc_arr, train_loss_arr

def plot_train_val_test_mlflow(title, csv_path, save_path):
    train_acc_arr, val_acc_arr, test_acc_arr, loss_l_arr, loss_u_arr = read_mlflow_csv(csv_path)
    epochs = np.arange(1, len(train_acc_arr) + 1)
    fig, ax1 = plt.subplots(figsize=PLOT_SIZE)

    color = 'tab:blue'
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy')
    ax1.plot(epochs, train_acc_arr, color='tab:blue', label='Training Accuracy')
    ax1.plot(epochs, val_acc_arr, color='tab:green', label='Validation Accuracy')
    ax1.plot(epochs, test_acc_arr, color='tab:orange', label='Test Accuracy')
    ax1.tick_params(axis='y')

    lines, labels = ax1.get_legend_handles_labels()
    plt.legend(lines, labels, loc='upper left')

    plt.title(title)
    plt.savefig(save_path)
    plt.close()

    fig, ax1 = plt.subplots(figsize=PLOT_SIZE)

    color = 'tab:blue'
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Unlabeled Loss')
    ax1.plot(epochs, loss_u_arr, color="tab:orange", label='Unlabeled Loss')
    ax1.tick_params(axis='y')

    ax2 = ax1.twinx()
    color = 'tab:red'
    ax2.set_ylabel('Labeled Loss')
    ax2.plot(epochs, loss_l_arr, color='tab:blue', label='Labeled Loss')
    ax2.tick_params(axis='y')

    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    plt.legend(lines + lines2, labels + labels2, loc='upper right')

    plt.title(title)
    plt.savefig(f"{save_path}_loss.png")
    plt.close()
    return plt


def plot_train_val_test_tensorboard(titles, csv_path, save_path):
    acc_arr_arr = read_tensorboard_csvs(csv_path)
    title_arr = titles.split(";")
    epochs = np.arange(1, len(acc_arr_arr[0]) + 1) * 300 / 1000
    fig, ax1 = plt.subplots(figsize=PLOT_SIZE)

    color = 'tab:blue'
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy')
    ax1.tick_params(axis='y')

    for title, acc_arr in zip(title_arr, acc_arr_arr):
        ax1.plot(epochs, acc_arr, label=title)

    lines, labels = ax1.get_legend_handles_labels()
    plt.legend(lines, labels, loc='lower right')
    plt.title("SimCLR Contrastive Learning")
    plt.savefig(save_path)
    plt.close()
    return plt

def plot_train_val_test(title, csv_path, save_path):
    if is_tensorboard(csv_path):
        return plot_train_val_test_tensorboard(title, csv_path, save_path)
    if is_mlflow(csv_path):
        return plot_train_val_test_mlflow(title, csv_path, save_path)
    train_acc_arr, val_acc_arr, test_acc_arr, train_loss_arr = read_csv(csv_path)
    epochs = np.arange(1, len(train_loss_arr) + 1)
    fig, ax1 = plt.subplots(figsize=PLOT_SIZE)
    # change the font size to 12 for all elements on the plot


    color = 'tab:blue'
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy')
    ax1.plot(epochs, train_acc_arr, color='tab:blue', label='Training Accuracy')
    ax1.plot(epochs, val_acc_arr, color='tab:green', label='Validation Accuracy')
    ax1.plot(epochs, test_acc_arr, color='tab:orange', label='Test Accuracy')
    ax1.tick_params(axis='y')

    ax2 = ax1.twinx()
    color = 'tab:red'
    ax2.set_ylabel('Loss', color=color)
    ax2.tick_params(axis='y', labelcolor=color)
    ax2.plot(epochs, train_loss_arr, color=color, label='Training Loss')

    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    plt.legend(lines + lines2, labels + labels2, loc='lower left')

    plt.title(title)
    plt.savefig(save_path)
    plt.close()
    return plt

def is_mlflow(csv_path):
    with open(csv_path, 'r') as csv_file:
        first_line = csv_file.readline()
        if "run_id,key,value,step,timestamp" == first_line.replace("\n", "").replace(" ", ""):
            return True
        return False
    
def is_tensorboard(csv_path):
    return ";" in csv_path
    
def get_best_val_test_acc(csv_path):
    if is_tensorboard(csv_path):
        return -1, -1, -1, -1
    train_acc_arr, val_acc_arr, test_acc_arr, _ = read_csv(csv_path) if not is_mlflow(csv_path) else read_mlflow_csv(csv_path)[:4]
    # if two val acc are the same with 0.001 precision, choose the one with higher test acc
    best_val_acc = max(val_acc_arr)
    best_val_acc_idxs = [i for i, val_acc in enumerate(val_acc_arr) if abs(val_acc - best_val_acc) < 0.001]

    # get best test acc
    best_test_acc = max([test_acc_arr[i] for i in best_val_acc_idxs])
    best_test_acc_idx = [i for i in best_val_acc_idxs if test_acc_arr[i] == best_test_acc][-1]

    # might as well get the best train acc
    best_train_acc = train_acc_arr[best_test_acc_idx]
    return best_train_acc, best_val_acc, best_test_acc, best_test_acc_idx


In [12]:
plt.rcParams.update({'font.size': 16})

titles = {
    "./resnet/csv/resnet18_batch64_noaug_nopretrain.csv": "ResNet18 (no pretraining, batch size 64)",
    "./resnet/csv/resnet18_batch64_noaug_pretrain.csv": "ResNet18 (with pretraining, batch size 64)",
    "./simclr/feature_eval/csv/simclr_resnet18_dim128_batch256_epoch275_frozen.csv": "SimCLR (ResNet18, dim 128, frozen)",
    "./simclr/feature_eval/csv/simclr_resnet18_dim128_batch256_epoch275_notfrozen.csv": "SimCLR (ResNet18, dim 128, not frozen)",
    "./simclr/feature_eval/csv/simclr_resnet18_dim256_batch256_epoch275_frozen.csv": "SimCLR (ResNet18, dim 256, frozen)",
    "./simclr/feature_eval/csv/simclr_resnet18_dim256_batch256_epoch275_notfrozen.csv": "SimCLR (ResNet18, dim 256, not frozen)",
    "./simclr/feature_eval/csv/simclr_resnet50_dim128_batch200_epoch295_frozen.csv": "SimCLR (ResNet50, dim 128, frozen)",
    "./simclr/feature_eval/csv/simclr_resnet50_dim128_batch200_epoch295_notfrozen.csv": "SimCLR (ResNet50, dim 128, not frozen)",
    "./vision_transformer/train_acc.csv": "Vision Transformer (configuration 1)",
    "./vision_transformer/train_acc_1.csv": "Vision Transformer (configuration 2)",
    "./vision_transformer/train_acc_0.csv": "Vision Transformer (configuration 3)",
    "./mixmatch_fixmatch/csv/mixmatch_orig.csv": "MixMatch (default)",
    "./mixmatch_fixmatch/csv/fixmatch_orig.csv": "FixMatch (default)",
    "./mixmatch_fixmatch/csv/fixmatch_bigcrop.csv": "FixMatch (big crop)",
    "./mixmatch_fixmatch/csv/mixmatch_bigcrop.csv": "MixMatch (big crop)",
    "./mixmatch_fixmatch/csv/mixmatch_bigcrop_mu50.csv": "MixMatch (big crop, mu=50)",
    "./simclr/pretrain_csv/simclr_resnet18_outdim128.csv;./simclr/pretrain_csv/simclr_resnet18_outdim256.csv;./simclr/pretrain_csv/simclr_resnet50_outdim128.csv": "SimCLR (ResNet18, dim 128);SimCLR (ResNet18, dim 256);SimCLR (ResNet50, dim 128)",
}

plot_dir = "./plots/"
os.makedirs(plot_dir, exist_ok=True)

for csv_path, title in titles.items():
    save_path = plot_dir + title + ".png"
    plot_train_val_test(title, csv_path, save_path=save_path)
    print(title + "\t\t", end="")
    best_train_acc, best_val_acc, best_test_acc, best_test_acc_idx = get_best_val_test_acc(csv_path)
    print(f"Epoch [{best_test_acc_idx + 1}]: train acc[{best_train_acc:.4f}] val acc[{best_val_acc:.4f}] test acc[{best_test_acc:.4f}]")

ResNet18 (no pretraining, batch size 64)		Epoch [38]: train acc[100.0000] val acc[54.7619] test acc[46.9388]
ResNet18 (with pretraining, batch size 64)		Epoch [20]: train acc[100.0000] val acc[47.6190] test acc[48.9796]
SimCLR (ResNet18, dim 128, frozen)		Epoch [61]: train acc[61.1322] val acc[57.1429] test acc[46.9388]
SimCLR (ResNet18, dim 128, not frozen)		Epoch [38]: train acc[77.9777] val acc[71.4286] test acc[61.2245]
SimCLR (ResNet18, dim 256, frozen)		Epoch [99]: train acc[63.1708] val acc[59.5238] test acc[55.1020]
SimCLR (ResNet18, dim 256, not frozen)		Epoch [65]: train acc[80.0610] val acc[69.0476] test acc[59.1837]
SimCLR (ResNet50, dim 128, frozen)		Epoch [112]: train acc[68.3475] val acc[66.6667] test acc[55.1020]
SimCLR (ResNet50, dim 128, not frozen)		Epoch [64]: train acc[86.0190] val acc[71.4286] test acc[55.1020]
Vision Transformer (configuration 1)		Epoch [46]: train acc[35.7746] val acc[34.6939] test acc[35.7143]
Vision Transformer (configuration 2)		Epoch [86]: t