In [1]:
import numpy as np, matplotlib.pyplot as plt, pickle, glob, re, os

In [2]:
def make_history_plots(resfolder, smooth_factor = None):
    foldFolders = sorted(glob.glob(os.path.join(resfolder,'Fold*')))

    fig1, ax1 = plt.subplots(figsize=(10,5))
    fig2, ax2 = plt.subplots(figsize=(10,5))
    fig3, ax3 = plt.subplots(figsize=(10,5))
    fig4, ax4 = plt.subplots(figsize=(10,5))
    fig5, ax5 = plt.subplots(figsize=(10,5))
    fig6, ax6 = plt.subplots(figsize=(10,5))


    def smoothen(data, kernel_size):
        if kernel_size is None:
            return data
        kernel = np.ones(kernel_size) / kernel_size
        data_convolved = np.convolve(data, kernel, mode='valid')
        return data_convolved

    linewidth = 2

    for folder in foldFolders:
        foldnumber = re.findall(r'Fold_(\d+)', folder)
        assert len(foldnumber) == 1
        foldnumber = foldnumber[0]
        with open(folder + '/history_fold_'+foldnumber+'.pickle', 'rb') as f:
            history = pickle.load(f)

        lbfolder = os.path.basename(folder)

        toplot_ax1 = smoothen(history['val_loss'],smooth_factor)
        ax1.plot(toplot_ax1, label=lbfolder, linewidth=linewidth)
        ax1.set_title('Validation Loss')
        ax1.set_ylabel('Sparse Categorical Crossentropy Loss')
        ax1.set_xlabel('Training Epoch')
        fig1.legend(bbox_to_anchor=(0.3, 0.8), fancybox=True, shadow=True)

        toplot_ax2 = smoothen(history['val_accuracy'],smooth_factor)
        ax2.plot(toplot_ax2, label=lbfolder, linewidth=linewidth)
        ax2.set_title('Validation Accuracy')
        ax2.set_ylabel('Accuracy')
        ax2.set_xlabel('Training Epoch')
        fig2.legend(bbox_to_anchor=(0.3, 0.5), fancybox=True, shadow=True)

        toplot_ax3 = history['loss']
        ax3.plot(toplot_ax3, label=lbfolder, linewidth=linewidth)
        ax3.set_title('Training Loss')
        ax3.set_ylabel('Sparse Categorical Crossentropy Loss')
        ax3.set_xlabel('Training Epoch')
        fig3.legend(bbox_to_anchor=(0.3, 0.8), fancybox=True, shadow=True)

        toplot_ax4 = history['accuracy']
        ax4.plot(toplot_ax4, label=lbfolder, linewidth=linewidth)
        ax4.set_title('Training Accuracy')
        ax4.set_ylabel('Accuracy')
        ax4.set_xlabel('Training Epoch')
        fig4.legend(bbox_to_anchor=(0.3, 0.5), fancybox=True, shadow=True)

        # Combined Trainig and Validation Loss
        toplot_ax5 = smoothen(history['loss'],smooth_factor)
        ax5.plot(toplot_ax5, label=lbfolder + ' Training', linewidth=linewidth)
        toplot_ax5 = smoothen(history['val_loss'],smooth_factor)
        ax5.plot(toplot_ax5, label=lbfolder + ' Validation', linewidth=linewidth)
        ax5.set_title('Training and Validation Loss')
        ax5.set_ylabel('Sparse Categorical Crossentropy Loss')
        ax5.set_xlabel('Training Epoch')
        fig5.legend(bbox_to_anchor=(0.3, 0.8), fancybox=True, shadow=True)

        # Combined Trainig and Validation Accuracy
        toplot_ax6 = smoothen(history['accuracy'],smooth_factor)
        ax6.plot(toplot_ax6, label=lbfolder + ' Training', linewidth=linewidth)
        toplot_ax6 = smoothen(history['val_accuracy'],smooth_factor)
        ax6.plot(toplot_ax6, label=lbfolder + ' Validation', linewidth=linewidth)
        ax6.set_title('Training and Validation Accuracy')
        ax6.set_ylabel('Accuracy')
        ax6.set_xlabel('Training Epoch')
        fig6.legend(bbox_to_anchor=(0.3, 0.5), fancybox=True, shadow=True)

    fig1.savefig(os.path.join(resfolder,'Validation_Loss.png'))
    fig2.savefig(os.path.join(resfolder,'Validation_Accuracy.png'))
    fig3.savefig(os.path.join(resfolder,'Training_Loss.png'))
    fig4.savefig(os.path.join(resfolder,'Training_Accuracy.png'))
    fig5.savefig(os.path.join(resfolder,'Training_and_Validation_Loss.png'))
    fig6.savefig(os.path.join(resfolder,'Training_and_Validation_Accuracy.png'))
    plt.close('all')

In [3]:
def createscript(resfolder):
    with open(os.path.join(resfolder,'make_history_plots.py'), 'w') as f:
        f.write("""
import numpy as np, matplotlib.pyplot as plt, pickle, glob, re

foldFolders = sorted(glob.glob('Fold*'))

fig1, ax1 = plt.subplots(figsize=(10,5))
fig2, ax2 = plt.subplots(figsize=(10,5))
fig3, ax3 = plt.subplots(figsize=(10,5))
fig4, ax4 = plt.subplots(figsize=(10,5))
fig5, ax5 = plt.subplots(figsize=(10,5))
fig6, ax6 = plt.subplots(figsize=(10,5))

def smoothen(data, kernel_size):
    kernel = np.ones(kernel_size) / kernel_size
    data_convolved = np.convolve(data, kernel, mode='valid')
    return data_convolved

smooth_factor = 5
linewidth = 2

for folder in foldFolders:
    foldnumber = re.findall(r'Fold_(\d+)', folder)
    assert len(foldnumber) == 1
    foldnumber = foldnumber[0]
    with open(folder + '/history_fold_'+foldnumber+'.pickle', 'rb') as f:
        history = pickle.load(f)

    toplot_ax1 = smoothen(history['val_loss'],smooth_factor)
    ax1.plot(toplot_ax1, label=folder, linewidth=linewidth)
    ax1.set_title('Validation Loss')
    ax1.set_ylabel('Sparse Categorical Crossentropy Loss')
    ax1.set_xlabel('Training Epoch')
    fig1.legend(bbox_to_anchor=(0.3, 0.8), fancybox=True, shadow=True)

    toplot_ax2 = smoothen(history['val_accuracy'],smooth_factor)
    ax2.plot(toplot_ax2, label=folder, linewidth=linewidth)
    ax2.set_title('Validation Accuracy')
    ax2.set_ylabel('Accuracy')
    ax2.set_xlabel('Training Epoch')
    fig2.legend(bbox_to_anchor=(0.3, 0.5), fancybox=True, shadow=True)

    toplot_ax3 = history['loss']
    ax3.plot(toplot_ax3, label=folder, linewidth=linewidth)
    ax3.set_title('Training Loss')
    ax3.set_ylabel('Sparse Categorical Crossentropy Loss')
    ax3.set_xlabel('Training Epoch')
    fig3.legend(bbox_to_anchor=(0.3, 0.8), fancybox=True, shadow=True)

    toplot_ax4 = history['accuracy']
    ax4.plot(toplot_ax4, label=folder, linewidth=linewidth)
    ax4.set_title('Training Accuracy')
    ax4.set_ylabel('Accuracy')
    ax4.set_xlabel('Training Epoch')
    fig4.legend(bbox_to_anchor=(0.3, 0.5), fancybox=True, shadow=True)

    # Combined Trainig and Validation Loss
    toplot_ax5 = smoothen(history['loss'],smooth_factor)
    ax5.plot(toplot_ax5, label=folder + ' Training', linewidth=linewidth)
    toplot_ax5 = smoothen(history['val_loss'],smooth_factor)
    ax5.plot(toplot_ax5, label=folder + ' Validation', linewidth=linewidth)
    ax5.set_title('Training and Validation Loss')
    ax5.set_ylabel('Sparse Categorical Crossentropy Loss')
    ax5.set_xlabel('Training Epoch')
    fig5.legend(bbox_to_anchor=(0.3, 0.8), fancybox=True, shadow=True)

    # Combined Trainig and Validation Accuracy
    toplot_ax6 = smoothen(history['accuracy'],smooth_factor)
    ax6.plot(toplot_ax6, label=folder + ' Training', linewidth=linewidth)
    toplot_ax6 = smoothen(history['val_accuracy'],smooth_factor)
    ax6.plot(toplot_ax6, label=folder + ' Validation', linewidth=linewidth)
    ax6.set_title('Training and Validation Accuracy')
    ax6.set_ylabel('Accuracy')
    ax6.set_xlabel('Training Epoch')
    fig6.legend(bbox_to_anchor=(0.3, 0.5), fancybox=True, shadow=True)


fig1.savefig('Validation_Loss.png')
fig2.savefig('Validation_Accuracy.png')
fig3.savefig('Training_Loss.png')
fig4.savefig('Training_Accuracy.png')
fig5.savefig('Training_and_Validation_Loss.png')
fig6.savefig('Training_and_Validation_Accuracy.png')
plt.close('all')
""")
    return os.path.abspath(os.path.join(resfolder,'make_history_plots.py'))

In [4]:

plotscript = createscript('./')

sizefolders = sorted(glob.glob('./output/_*'))
for sizefolder in sizefolders:
    winlen = re.findall(r'_(\d+)windowed', os.path.basename(sizefolder))
    assert len(winlen) == 1
    winlen = winlen[0]
    problemfolders = sorted(glob.glob(sizefolder + '/*'))
    for problemfolder in problemfolders:
        assert os.path.basename(problemfolder) in ['full','perc']
        # print('Problemfolder: ' + problemfolder)
        resfolders = sorted(glob.glob(problemfolder + '/_'+winlen+'*/'))
        # print('Resfolder: ' + str(resfolders))
        for resfolder in resfolders:
            if glob.glob(resfolder + '/*.png') == []:
                print('Resfolder: ' + resfolder)
                make_history_plots(resfolder, smooth_factor = 3)
                targetLinkPath = os.path.join(resfolder,'make_history_plots.py')
                if os.path.exists(targetLinkPath):
                    os.remove(targetLinkPath)
                os.symlink(plotscript, targetLinkPath)

print('Done')

Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6064_acc_0.7914_CrossValidatedRun_20230221-234525/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6304_acc_0.8056_CrossValidatedRun_20230222-094853/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6322_acc_0.7950_CrossValidatedRun_20230222-084306/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6342_acc_0.7963_CrossValidatedRun_20230221-152643/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6458_acc_0.7917_CrossValidatedRun_20230221-145929/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6461_acc_0.8014_CrossValidatedRun_20230222-020252/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6487_acc_0.8064_CrossValidatedRun_20230222-001000/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_0.6506_acc_0.7972_CrossValidatedRun_20230222-053239/
Resfolder: ./output/_3456windowed/full/_3456windowedfull_c_maf1_

# Also, extract dict with classification report

In [65]:
def extract_from_history(folder, report_name):
    assert os.path.exists(folder), 'Folder does not exist: ' + folder
    infopath = os.path.join(folder,'info.txt')
    assert os.path.exists(infopath), 'Info file does not exist: ' + infopath

    with open(infopath, 'r') as f:
        prevline = ''
        lineread = f.readline()
        found_report = False
        reportlines = []

        # print('Looking for %s in %s' % (report_name, infopath))
        while not (found_report and lineread.strip() == '' and prevline.strip() == ''):
            if report_name in lineread.strip():
                found_report = True
                # print('Found %s in %s' % (report_name, infopath))
            elif found_report:
                reportlines.append(lineread.strip())
            prevline = lineread
            lineread = f.readline()
            if not lineread:
                break
        # assert reportlines != [], 'Could not find %s in %s' % (report_name, infopath)
        if reportlines == [] or len(reportlines) < 3:
            print('Could not find %s in %s' % (report_name, infopath))
            return None

        # Drop header
        header = reportlines[0].split()
        reportlines = reportlines[1:]
        # Drop empty lines
        reportlines = [line for line in reportlines if line != '']
        # Split
        reportlines = [[l for l in line.split('  ') if l != ''] for line in reportlines]

        report_dict = {}
        for line in reportlines:
            # print('line:"'+str(line)+'"')
            if line[0] == 'accuracy':
                report_dict['accuracy'] = float(line[1])
            else:
                assert len(line) == len(header)+1, 'Line does not have the right number of entries: %i != %i +1'%(len(line),len(header))

                report_dict[line[0]] = {t:float(l) for t,l in zip(header,line[1:])}

        return report_dict
        # print(reportlines)
        # print(report_dict)
    


plotscript = createscript('./')

sizefolders = sorted(glob.glob('./output/_*'))
for sizefolder in sizefolders:
    winlen = re.findall(r'_(\d+)windowed', os.path.basename(sizefolder))
    assert len(winlen) == 1
    winlen = winlen[0]
    problemfolders = sorted(glob.glob(sizefolder + '/*'))
    for problemfolder in problemfolders:
        assert os.path.basename(problemfolder) in ['full','perc']
        # print('Problemfolder: ' + problemfolder)
        resfolders = sorted(glob.glob(problemfolder + '/_'+winlen+'*/'))
        # print('Resfolder: ' + str(resfolders))
        for resfolder in resfolders:
            # for eldsf in glob.glob(resfolder + '/extracted*.txt'):
            #     os.remove(eldsf)
            if glob.glob(resfolder + '/extracted*.txt') == []:
                print('Resfolder: ' + resfolder)
                for toextract in ['avg_classification_report', 'avg_classification_report_for_quantized_model', 'avg_classification_report_for_fullquantized_model']:
                    report = extract_from_history(resfolder, toextract)
                    if report:
                        with open(os.path.join(resfolder,'extracted_'+toextract+'.txt'), 'w') as f:
                            f.write(str(report))
    #         break
    #     break
    # break

print('Done')

Done
