In [1]:
import boto3
import numpy 
import sagemaker
from sagemaker.pytorch import PyTorch
import torch
import os

### RESNEXT_29_8x64D

#### Without Containers

In [2]:
# Sagemaker Notebook must be of type, conda_pytorch_p36

!pip install -r '/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/requirements.txt'

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m


In [3]:
# Need to add this to requirements.txt
!pip install tensorboard

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -m pip install --upgrade pip' command.[0m


In [None]:
# Train the model per the settings specified for ResNext 29_4x64d in the original paper
os.chdir('/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/')
!python train.py --config configs/cifar/resnext.yaml \
    model.resnext.cardinality 8 \
    train.batch_size 128 \
    train.base_lr 0.1 \
    train.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00 \
    scheduler.epochs 300

# Number of epochs should be 300!

[32m[2020-06-11 02:12:08] __main__ INFO: [0mdevice: cuda
cudnn:
  benchmark: True
  deterministic: False
dataset:
  name: CIFAR10
  dataset_dir: ~/.torch/datasets/CIFAR10
  image_size: 32
  n_channels: 3
  n_classes: 10
model:
  type: cifar
  name: resnext
  init_mode: kaiming_fan_out
  vgg:
    n_channels: [64, 128, 256, 512, 512]
    n_layers: [2, 2, 3, 3, 3]
    use_bn: True
  resnet:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
  resnet_preact:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
    remove_first_relu: False
    add_last_bn: False
    preact_stage: [True, True, True]
  wrn:
    depth: 28
    initial_channels: 16
    widening_factor: 10
    drop_rate: 0.0
  densenet:
    depth: 100
    n_blocks: [6, 12, 24, 16]
    block_type: bottleneck
    growth_rate: 12
    drop_rate: 0.0
    compression_rate: 0.5
  pyramidnet:
    depth: 272
    n_blocks: [3, 24, 36, 3]
    initial_channels: 16
  

In [5]:
## Evaluate the trained, saved model using the CIFAR 10 test dataset 
# Right the results to the test output directory specified.
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnext.cardinality 8 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00300.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/test_results_0300

[32m[2020-06-14 17:22:43] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00300.pth
Files already downloaded and verified
100%|███████████████████████████████████████████| 79/79 [01:02<00:00,  1.27it/s]
[32m[2020-06-14 17:23:46] __main__ INFO: [0mElapsed 62.31
[32m[2020-06-14 17:23:46] __main__ INFO: [0mLoss 0.1457 Accuracy 0.9568


In [6]:
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnext.cardinality 8 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00200.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/test_results_0200

[32m[2020-06-14 17:24:16] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00200.pth
Files already downloaded and verified
100%|███████████████████████████████████████████| 79/79 [01:02<00:00,  1.26it/s]
[32m[2020-06-14 17:25:20] __main__ INFO: [0mElapsed 62.76
[32m[2020-06-14 17:25:20] __main__ INFO: [0mLoss 0.2246 Accuracy 0.9324


In [7]:
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnext.cardinality 8 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00100.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/test_results_0100

[32m[2020-06-14 17:25:30] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00100.pth
Files already downloaded and verified
100%|███████████████████████████████████████████| 79/79 [01:03<00:00,  1.25it/s]
[32m[2020-06-14 17:26:34] __main__ INFO: [0mElapsed 63.14
[32m[2020-06-14 17:26:34] __main__ INFO: [0mLoss 0.4673 Accuracy 0.8432


In [9]:
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnext.cardinality 8 \
   test.batch_size 128 \
   dataset.name CIFAR101 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00300.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/test_results_0300_CIFAR101

[32m[2020-06-14 17:34:46] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/checkpoint_00300.pth
CIFAR 10.1
100%|███████████████████████████████████████████| 16/16 [00:12<00:00,  1.26it/s]
[32m[2020-06-14 17:34:59] __main__ INFO: [0mElapsed 12.72
[32m[2020-06-14 17:34:59] __main__ INFO: [0mLoss 0.3615 Accuracy 0.8930


In [10]:
# Write the results to a CSV file so that we can analyze later.
import pandas as pd

results = {'Model': ['resnext_29_8x64d', 'resnext_29_8x64d', 'resnext_29_8x64d', 'resnext_29_8x64d'],
           'Testset': ['cifar10', 'cifar10', 'cifar10', 'cifar10.1'],
           'Epoch': [100, 200, 300, 300],
           'Loss': [0.4673, 0.2246, 0.1457, 0.3615],
           'Accuracy': [0.8432, 0.9324, 0.9568, 0.8930],
           'Original_Accuracy': [96.2, 96.2, 96.2, 90.0],
           'Original_CI': [(95.8, 96.6), (95.8, 96.6), (95.8, 96.6), (88.6, 91.2)]
           }

df = pd.DataFrame(results, columns = ['Model', 'Testset', 'Epoch', 'Loss', 'Accuracy', 
                                      'Original_Accuracy', 'Original_CI'])


df.to_csv('/home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/results.csv')
df.head()

Unnamed: 0,Model,Testset,Epoch,Loss,Accuracy,Original_Accuracy,Original_CI
0,resnext_29_8x64d,cifar10,100,0.4673,0.8432,96.2,"(95.8, 96.6)"
1,resnext_29_8x64d,cifar10,200,0.2246,0.9324,96.2,"(95.8, 96.6)"
2,resnext_29_8x64d,cifar10,300,0.1457,0.9568,96.2,"(95.8, 96.6)"
3,resnext_29_8x64d,cifar10.1,300,0.3615,0.893,90.0,"(88.6, 91.2)"


In [11]:
# Peak inside the output file for predictions
import numpy as np
output = '/home/ec2-user/SageMaker/experiments/resnext_29_8x64d/exp00/test_results_0300/predictions.npz'
npzfile = np.load(output)
print(npzfile.files)
npzfile['preds']

['preds', 'probs', 'labels', 'loss', 'acc']


array([[-1.4072659 , -1.7579162 , -0.0632572 , ..., -1.604979  ,
        -2.1179168 , -2.3709297 ],
       [-0.5318509 ,  2.3948684 , -1.824476  , ..., -1.735688  ,
         9.22153   , -1.438901  ],
       [-0.8268721 ,  1.7920238 , -2.0168679 , ..., -1.9178003 ,
        11.20963   , -1.209595  ],
       ...,
       [-2.3609014 , -1.2094655 ,  1.1702778 , ..., -0.8153744 ,
        -2.0398674 , -1.3504086 ],
       [-0.30113205, 10.91866   , -1.1114168 , ..., -2.3269584 ,
        -0.7837641 , -2.0046387 ],
       [-1.7235601 , -1.396374  , -1.2323154 , ..., 13.169274  ,
        -2.241554  , -1.0829642 ]], dtype=float32)

In [12]:
# Upload the model checkpoints, configs, and results to S3 
bucket='sagemaker-may29'
prefix = 'sagemaker/results/original-models/resnext_29_8x64d'
path = '/home/ec2-user/SageMaker/experiments/resnext_29_8x64d'

s3_resource = boto3.resource("s3", region_name="us-east-2")

def uploadDirectory(local_path,bucket_name,s3_prefix):

    my_bucket = s3_resource.Bucket(bucket_name)
    
    for path, subdirs, files in os.walk(local_path):
        path = path.replace("\\","/")
        directory_name = path.replace(local_path,"")
        for file in files:
            #print("Local File:", os.path.join(path, file))
            #print("      Dest:", s3_prefix+directory_name+'/'+file)
            my_bucket.upload_file(os.path.join(path, file), s3_prefix+directory_name+'/'+file)
    
uploadDirectory(path,bucket,prefix)

In [43]:
os.getcwd()


'/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass'