### Accuracy Impacts of Augmentation on Testsets

This notebook uses the models the team trained on augmented datasets to determine what impact evaluating testsets that have also been augmented has on model accuracy.  While the team's focus is on shrinking the distribution gap observed between the CIFAR 10 and 10.1 testsets, the results are examined from two dimensions:
  1. What, if any, effect is observed on the distribution gap when comparing model evaluations on an augemented CIFAR 10 testset to an augmented CIFAR 10.1 tesetset.
  2. What, if any, effect is observed in model accuracy when comparing the model performs predicting classes for the unaugmented CIFAR X testset versus the augmented CIFAR X testset?

In [111]:
import boto3
from collections import defaultdict
import glob
import os
import pandas as pd
import re

pd.set_option('display.max_rows', 500)

In [74]:
# S3 Bucket where the model checkpoints exist
bucket = 'sagemaker-june29'

# Directory path within S3 to the model results
prefix = 'sagemaker/results/original-models/'

# Local destination directory 
destdir = '/home/ec2-user/SageMaker/checkpoints/'

# File path to model pytorch files
pytorch_models = '/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/'

# Dictionary to look up matching config files with
config_files = defaultdict(lambda: "default", key="skip")

config_files['wrn_28_10'] = "wrn.yaml"
config_files['resnet_basic_32'] = "resnet.yaml"
config_files['densenet_BC_100_12'] = "densenet.yaml"
config_files['resnext_29_4x64d'] =  "resnext.yaml"

In [39]:
# Retrieve a list the checkpoint files 
def get_chpt_files(bucket, prefix):

    s3 = boto3.client('s3')
    refined_checkpoints = []
    basetraining_checkpoints = []
    kwargs = {'Bucket': bucket, 'Prefix': prefix}

    resp = s3.list_objects_v2(**kwargs)
    for obj in resp['Contents']:
        key = obj['Key']
        if key.endswith("00400.pth"):
            basetraining_checkpoints.append(key)
        if key.endswith("00050.pth"):
            refined_checkpoints.append(key)
            
    return basetraining_checkpoints, refined_checkpoints



# Download a checkpoint file and write it to the destination directory
def download_chpt(bucket, destdir, file):
        
    s3 = boto3.resource('s3')
    filesplit = file.split("/")
    filename = str(filesplit[-1])
    model = str(filesplit[3])
    destination = destdir + model + '_' + filename
    s3.Bucket(bucket).download_file(file, destination)
        

In [46]:
# Download the model checkpoints
basetraining_checkpoints, refined_checkpoints = get_chpt_files(bucket, prefix)

for cp in basetraining_checkpoints + refined_checkpoints:
    download_chpt(bucket, destdir, cp)

os.chdir(destdir)
listing = glob.glob(destdir + "*pth")

RA_2_5_Datasets = ['CIFAR10_RA_2_5', 'CIFAR101_RA_2_5']
RA_1_20_Datasets = ['CIFAR10_RA_1_20', 'CIFAR101_RA_1_20']
RA_2_20_Datasets = ['CIFAR10_RA_2_20', 'CIFAR101_RA_2_20']
RA_3_20_Datasets = ['CIFAR10_RA_3_20', 'CIFAR101_RA_3_20']

In [114]:
result = None

def evalute_models(datasets, checkpoints):
    for ds in datasets:
        for chpt in checkpoints:
            
            print(chpt)
            # Get config file
            #cfg = [v for k,v in config_files.items() if model in k]

            filesplit = chpt.split("/")
            filename = str(filesplit[-1])
            model = filename.split("_")[0]

            os.chdir(pytorch_models)
            results_folder = '/home/ec2-user/SageMaker/experiments/augmented_testsets/' +  filename

            if 'wrn_28_10' in filename:
                print("Test dataset:", ds)
                !python evaluate.py --config configs/cifar/wrn.yaml \
                   model.wrn.depth 28 \
                   model.wrn.widening_factor 10 \
                   dataset.name $ds \
                   test.checkpoint $chpt \
                   test.output_dir $results_folder

            elif 'resnet_basic_32' in filename: 
                print("Test dataset:", ds)
                !python evaluate.py --config configs/cifar/resnet.yaml \
                   model.resnet.depth 32 \
                   test.batch_size 128 \
                   dataset.name $ds \
                   test.checkpoint $chpt \
                   test.output_dir $results_folder

            elif 'densenet_BC_100_12' in filename:
                print("Test dataset:", ds)
                !python evaluate.py --config configs/cifar/densenet.yaml \
                   model.densenet.depth 100 \
                   model.densenet.growth_rate 12 \
                   test.batch_size 64 \
                   dataset.name $ds \
                   test.checkpoint $chpt \
                   test.output_dir $results_folder

            elif 'resnext_29_4x64d' in filename:
                print("Test dataset:", ds)
                !python evaluate.py --config configs/cifar/resnext.yaml \
                   model.resnext.cardinality 4 \
                   test.batch_size 128 \
                   dataset.name $ds \
                   test.checkpoint $chpt \
                   test.output_dir $results_folder


In [116]:
%%capture cap --no-stderr
# evalute_models(RA_1_20_Datasets, [s for s in listing if 'ra_1_20' in s])
# evalute_models(RA_2_20_Datasets, [s for s in listing if 'ra_2_20' in s])
# evalute_models(RA_3_20_Datasets, [s for s in listing if 'ra_3_20' in s])
evalute_models(RA_2_5_Datasets, [s for s in listing if 'ra_2_5' in s])
        
with open('output.txt', 'w') as f:
     f.write(cap.stdout)

In [126]:
p = re.compile("Loss (.*) Accuracy (.*)\r")
result = p.findall(cap.stdout)

q = re.compile("/home/ec2-user/SageMaker/checkpoints/(.*)\r")
result2 = q.findall(cap.stdout)

r = re.compile("Test dataset: (.*)\n")
result3 = r.findall(cap.stdout)

acc = [e+(r2,r3) for e,r2,r3 in zip(result, result2, result3)]
acc

[('1.4287',
  '0.6206',
  'densenet_BC_100_12_ra_2_5_checkpoint_00400.pth',
  'CIFAR10_RA_2_5'),
 ('1.4376',
  '0.6201',
  'densenet_BC_100_12_ra_2_5_c10val_checkpoint_00400.pth',
  'CIFAR10_RA_2_5'),
 ('2.5897',
  '0.5534',
  'densenet_BC_100_12_ra_2_5_c10val_checkpoint_00050.pth',
  'CIFAR10_RA_2_5'),
 ('1.8323',
  '0.5380',
  'densenet_BC_100_12_ra_2_5_checkpoint_00050.pth',
  'CIFAR10_RA_2_5'),
 ('2.6865',
  '0.4725',
  'densenet_BC_100_12_ra_2_5_checkpoint_00400.pth',
  'CIFAR101_RA_2_5'),
 ('2.8749',
  '0.4485',
  'densenet_BC_100_12_ra_2_5_c10val_checkpoint_00400.pth',
  'CIFAR101_RA_2_5'),
 ('3.6019',
  '0.5060',
  'densenet_BC_100_12_ra_2_5_c10val_checkpoint_00050.pth',
  'CIFAR101_RA_2_5'),
 ('2.3368',
  '0.5030',
  'densenet_BC_100_12_ra_2_5_checkpoint_00050.pth',
  'CIFAR101_RA_2_5')]

In [127]:
df = pd.DataFrame(acc, columns =['Loss', 'Accuracy', 'Checkpoint', 'Testset'])
df.to_csv('/home/ec2-user/SageMaker/experiments/augmented_testsets/augmentedtestsets_results.csv')
df

Unnamed: 0,Loss,Accuracy,Checkpoint,Testset
0,1.4287,0.6206,densenet_BC_100_12_ra_2_5_checkpoint_00400.pth,CIFAR10_RA_2_5
1,1.4376,0.6201,densenet_BC_100_12_ra_2_5_c10val_checkpoint_00...,CIFAR10_RA_2_5
2,2.5897,0.5534,densenet_BC_100_12_ra_2_5_c10val_checkpoint_00...,CIFAR10_RA_2_5
3,1.8323,0.538,densenet_BC_100_12_ra_2_5_checkpoint_00050.pth,CIFAR10_RA_2_5
4,2.6865,0.4725,densenet_BC_100_12_ra_2_5_checkpoint_00400.pth,CIFAR101_RA_2_5
5,2.8749,0.4485,densenet_BC_100_12_ra_2_5_c10val_checkpoint_00...,CIFAR101_RA_2_5
6,3.6019,0.506,densenet_BC_100_12_ra_2_5_c10val_checkpoint_00...,CIFAR101_RA_2_5
7,2.3368,0.503,densenet_BC_100_12_ra_2_5_checkpoint_00050.pth,CIFAR101_RA_2_5
