### Wide Residual Net 29 4x64
- Training Dataset: RandAugment, N=1, M=20
- Sagemaker Notebook must be of type, conda_pytorch_p36

In [None]:
import boto3
import numpy 
import sagemaker
from sagemaker.pytorch import PyTorch
import torch
import os

#### Install Requirements

In [2]:
# Sagemaker Notebook must be of type conda_pytorch_p36

!pip install -r '/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/requirements.txt'

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m


In [3]:
# Need to add this to requirements.txt
!pip install tensorboard

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m


In [None]:
# Train the model per the settings specified in the original paper, but using augmented data
os.chdir('/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/')
!python train.py --config configs/cifar/wrn.yaml \
    model.wrn.depth 28 \
    model.wrn.widening_factor 10 \
    train.batch_size 128 \
    train.base_lr 0.1 \
    dataset.name CIFAR10_RA_1_20 \
    train.output_dir /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00 \
    scheduler.epochs 400

[32m[2020-07-11 15:53:31] __main__ INFO: [0mdevice: cuda
cudnn:
  benchmark: True
  deterministic: False
dataset:
  name: CIFAR10_RA_1_20
  dataset_dir: ''
  image_size: 32
  n_channels: 3
  n_classes: 10
model:
  type: cifar
  name: wrn
  init_mode: kaiming_fan_in
  vgg:
    n_channels: [64, 128, 256, 512, 512]
    n_layers: [2, 2, 3, 3, 3]
    use_bn: True
  resnet:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
  resnet_preact:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
    remove_first_relu: False
    add_last_bn: False
    preact_stage: [True, True, True]
  wrn:
    depth: 28
    initial_channels: 16
    widening_factor: 10
    drop_rate: 0.0
  densenet:
    depth: 100
    n_blocks: [6, 12, 24, 16]
    block_type: bottleneck
    growth_rate: 12
    drop_rate: 0.0
    compression_rate: 0.5
  pyramidnet:
    depth: 272
    n_blocks: [3, 24, 36, 3]
    initial_channels: 16
    block_type: bottle

In [5]:
# Resume training with the un-augmented data

#### Set LEARNING RATE based on ending learning rate 
os.chdir('/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/')
#!python train.py --config configs/cifar/resnet.yaml \
!python train.py --config /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/config.yaml \
    train.checkpoint /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/checkpoint_00400.pth \
    dataset.name CIFAR10 \
    train.base_lr .001 \
    train.output_dir /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00_resume400_50 \
    scheduler.epochs 50


[32m[2020-07-13 23:06:22] __main__ INFO: [0mdevice: cuda
cudnn:
  benchmark: True
  deterministic: False
dataset:
  name: CIFAR10
  dataset_dir: ~/.torch/datasets/CIFAR10
  image_size: 32
  n_channels: 3
  n_classes: 10
model:
  type: cifar
  name: wrn
  init_mode: kaiming_fan_in
  vgg:
    n_channels: [64, 128, 256, 512, 512]
    n_layers: [2, 2, 3, 3, 3]
    use_bn: True
  resnet:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
  resnet_preact:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
    remove_first_relu: False
    add_last_bn: False
    preact_stage: [True, True, True]
  wrn:
    depth: 28
    initial_channels: 16
    widening_factor: 10
    drop_rate: 0.0
  densenet:
    depth: 100
    n_blocks: [6, 12, 24, 16]
    block_type: bottleneck
    growth_rate: 12
    drop_rate: 0.0
    compression_rate: 0.5
  pyramidnet:
    depth: 272
    n_blocks: [3, 24, 36, 3]
    initial_channels: 16
    blo

#### Collect model predictions and performance metrics 

In [8]:
## Evaluate the trained, saved model using the CIFAR 10 test dataset 
# write the results to the test output directory specified
!python evaluate.py --config configs/cifar/wrn.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00_resume400_50/checkpoint_00050.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00_resume400_50/test_results_0050_cifar10

[32m[2020-07-18 18:14:54] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00_resume400_50/checkpoint_00050.pth
Files already downloaded and verified
100%|███████████████████████████████████████████| 79/79 [00:26<00:00,  2.98it/s]
[32m[2020-07-18 18:15:22] __main__ INFO: [0mElapsed 26.53
[32m[2020-07-18 18:15:22] __main__ INFO: [0mLoss 0.2292 Accuracy 0.9463


In [9]:
## Evaluate the trained, saved model using the CIFAR 10.1 test dataset 
# Right the results to the test output directory specified.
!python evaluate.py --config configs/cifar/wrn.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   dataset.name CIFAR101 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00_resume400_50/checkpoint_00050.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00_resume400_50/test_results_0050_cifar101

[32m[2020-07-18 18:15:52] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00_resume400_50/checkpoint_00050.pth
CIFAR 10.1
100%|███████████████████████████████████████████| 16/16 [00:05<00:00,  2.87it/s]
[32m[2020-07-18 18:15:58] __main__ INFO: [0mElapsed 5.58
[32m[2020-07-18 18:15:58] __main__ INFO: [0mLoss 0.5373 Accuracy 0.8905


In [10]:
## Evaluate the trained, saved model using the CIFAR 10 test dataset 
# write the results to the test output directory specified.
!python evaluate.py --config configs/cifar/wrn.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/checkpoint_00400.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/test_results_0400_cifar10

[32m[2020-07-18 18:16:51] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/checkpoint_00400.pth
Files already downloaded and verified
100%|███████████████████████████████████████████| 79/79 [00:26<00:00,  2.98it/s]
[32m[2020-07-18 18:17:18] __main__ INFO: [0mElapsed 26.48
[32m[2020-07-18 18:17:18] __main__ INFO: [0mLoss 0.4205 Accuracy 0.9037


In [11]:
## Evaluate the trained, saved model using the CIFAR 10.1 test dataset 
# write the results to the test output directory specified.
os.chdir('/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/')
!python evaluate.py --config configs/cifar/wrn.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/checkpoint_00400.pth \
   dataset.name CIFAR101 \
   test.output_dir /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/test_results_0400_cifar101

[32m[2020-07-18 18:17:35] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/exp00/checkpoint_00400.pth
CIFAR 10.1
100%|███████████████████████████████████████████| 16/16 [00:05<00:00,  2.84it/s]
[32m[2020-07-18 18:17:41] __main__ INFO: [0mElapsed 5.63
[32m[2020-07-18 18:17:41] __main__ INFO: [0mLoss 0.8154 Accuracy 0.8275


In [12]:
import pandas as pd
a = pd.Series(['wrn_28_10_ra_1_20', 400, 'cifar10', 0.4205, 0.9037]) #Loss 0.4205 Accuracy 0.9037
c = pd.Series(['wrn_28_10_ra_1_20', 400, 'cifar10.1', 0.8154, 0.8275]) #Loss 0.8154 Accuracy 0.8275


e = pd.Series(['wrn_28_10_ra_1_20_refined400', 50, 'cifar10.1', 0.5373, 0.8905]) #Loss 0.5373 Accuracy 0.8905
f = pd.Series(['wrn_28_10_ra_1_20_refined400', 50, 'cifar10', 0.2292,0.9463]) #Loss 0.2292 Accuracy 0.9463
               
df_results = pd.concat([a,c,e,f], axis=1).T
df_results.columns = ['Model', 'Epoch', 'Testset', 'Loss', 'Accuracy']

df_results['Original_Accuracy'] = df_results.apply((lambda row: 95.9 if row[2] == 'cifar10' else 89.7), axis=1)
df_results['Original_CI'] = df_results.apply((lambda row: (95.5, 96.3) if row[2] == 'cifar10' else (88.3, 91.0)), axis=1)

df_results.to_csv('/home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20/results.csv')
df_results

Unnamed: 0,Model,Epoch,Testset,Loss,Accuracy,Original_Accuracy,Original_CI
0,wrn_28_10_ra_1_20,400,cifar10,0.4205,0.9037,95.9,"(95.5, 96.3)"
1,wrn_28_10_ra_1_20,400,cifar10.1,0.8154,0.8275,89.7,"(88.3, 91.0)"
2,wrn_28_10_ra_1_20_refined400,50,cifar10.1,0.5373,0.8905,89.7,"(88.3, 91.0)"
3,wrn_28_10_ra_1_20_refined400,50,cifar10,0.2292,0.9463,95.9,"(95.5, 96.3)"


In [14]:
# Peak inside the output file for predictions
import numpy as np
output = '/home/ec2-user/SageMaker/experiments/wrn_28_10_ra_2_20/exp00/test_results_0400_cifar10/predictions.npz'
npzfile = np.load(output)
print(npzfile.files)
npzfile['preds']

['preds', 'probs', 'labels', 'loss', 'acc']


array([[-1.6346034e+00, -1.6786125e+00, -5.5114478e-01, ...,
        -1.3655751e+00, -9.7406626e-01, -2.0390615e+00],
       [ 1.4603181e-01, -5.1087286e-02, -1.2974702e+00, ...,
        -2.0043511e+00,  1.0846609e+01, -1.3743297e+00],
       [-3.3679867e-01, -6.3276786e-01, -7.7840930e-01, ...,
        -1.8309350e+00,  7.5045228e+00,  6.6094440e-01],
       ...,
       [-1.5556248e+00, -1.7238193e+00, -6.5844554e-01, ...,
        -7.4209386e-01, -1.6280203e+00, -3.8239390e-01],
       [ 9.8031856e-02,  8.8812132e+00, -1.9066744e-03, ...,
        -1.7034054e+00, -7.0803320e-01, -7.0997417e-01],
       [-1.2500942e+00, -1.3773537e+00, -1.1949891e+00, ...,
         8.8070650e+00, -1.4133977e+00, -9.2784518e-01]], dtype=float32)

In [15]:
# Upload the model checkpoints, configs, and results to S3 
bucket='sagemaker-may29'
prefix = 'sagemaker/results/original-models/wrn_28_10_ra_1_20'
path = '/home/ec2-user/SageMaker/experiments/wrn_28_10_ra_1_20'

s3_resource = boto3.resource("s3", region_name="us-east-2")

def uploadDirectory(local_path,bucket_name,s3_prefix):

    my_bucket = s3_resource.Bucket(bucket_name)
    
    for path, subdirs, files in os.walk(local_path):
        path = path.replace("\\","/")
        directory_name = path.replace(local_path,"")
        for file in files:
            #print("Local File:", os.path.join(path, file))
            #print("      Dest:", s3_prefix+directory_name+'/'+file)
            my_bucket.upload_file(os.path.join(path, file), s3_prefix+directory_name+'/'+file)
    
uploadDirectory(path,bucket,prefix)