## ResNext 29 4x64

 - Training Dataset:  CutMix, beta=1, cutmix_prob=1
 - Sagemaker Notebook must be of type, conda_pytorch_p36
 
#### Install Requirements

In [1]:
import boto3
import numpy 
import sagemaker
from sagemaker.pytorch import PyTorch
import torch
import os

In [2]:
!pip install -r '/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/requirements.txt'

# Need to add this to requirements.txt
!pip install tensorboard



#### Train the Model

In [None]:
# Train the model per the settings specified in the original paper
os.chdir('/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/')
!python train.py --config configs/cifar/resnext.yaml \
    model.resnext.cardinality 4 \
    train.batch_size 128 \
    train.base_lr 0.1 \
    dataset.name CIFAR10_CM_1 \
    train.base_lr 0.1 \
    train.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00 \
    scheduler.epochs 400

[32m[2020-07-10 05:54:02] __main__ INFO: [0mdevice: cuda
cudnn:
  benchmark: True
  deterministic: False
dataset:
  name: CIFAR10_CM_1
  dataset_dir: ''
  image_size: 32
  n_channels: 3
  n_classes: 10
model:
  type: cifar
  name: resnet
  init_mode: kaiming_fan_out
  vgg:
    n_channels: [64, 128, 256, 512, 512]
    n_layers: [2, 2, 3, 3, 3]
    use_bn: True
  resnet:
    depth: 32
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
  resnet_preact:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
    remove_first_relu: False
    add_last_bn: False
    preact_stage: [True, True, True]
  wrn:
    depth: 28
    initial_channels: 16
    widening_factor: 10
    drop_rate: 0.0
  densenet:
    depth: 100
    n_blocks: [6, 12, 24, 16]
    block_type: bottleneck
    growth_rate: 12
    drop_rate: 0.0
    compression_rate: 0.5
  pyramidnet:
    depth: 272
    n_blocks: [3, 24, 36, 3]
    initial_channels: 16
    block_type: bottle

#### Refine the Model

In [54]:
# Resume training with the un-augmented data
os.chdir('/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/')
!python train.py --config /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00/config.yaml \
    train.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00/checkpoint_00400.pth \
    dataset.name CIFAR10 \
    model.wrn.depth 28 \
    model.wrn.widening_factor 10 \
    train.batch_size 128 \
    train.base_lr 0.000032 \
    train.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00_resume400_50 \
    scheduler.epochs 50

#### Set LEARNING RATE based on ending LR

[32m[2020-07-10 13:39:42] __main__ INFO: [0mdevice: cuda
cudnn:
  benchmark: True
  deterministic: False
dataset:
  name: CIFAR10
  dataset_dir: ~/.torch/datasets/CIFAR10
  image_size: 32
  n_channels: 3
  n_classes: 10
model:
  type: cifar
  name: resnet
  init_mode: kaiming_fan_out
  vgg:
    n_channels: [64, 128, 256, 512, 512]
    n_layers: [2, 2, 3, 3, 3]
    use_bn: True
  resnet:
    depth: 32
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
  resnet_preact:
    depth: 110
    n_blocks: [2, 2, 2, 2]
    block_type: basic
    initial_channels: 16
    remove_first_relu: False
    add_last_bn: False
    preact_stage: [True, True, True]
  wrn:
    depth: 28
    initial_channels: 16
    widening_factor: 10
    drop_rate: 0.0
  densenet:
    depth: 100
    n_blocks: [6, 12, 24, 16]
    block_type: bottleneck
    growth_rate: 12
    drop_rate: 0.0
    compression_rate: 0.5
  pyramidnet:
    depth: 272
    n_blocks: [3, 24, 36, 3]
    initial_channels: 16
    

In [55]:
## Evaluate the trained, saved model using the CIFAR 10 test dataset 
# Right the results to the test output directory specified.
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00_resume400_50/checkpoint_00050.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00_resume400_50/test_results_0050_cifar10

[32m[2020-07-10 14:10:31] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnet_basic_32_cm_1_1/exp00_resume400_50/checkpoint_00050.pth
Files already downloaded and verified
100%|███████████████████████████████████████████| 79/79 [00:02<00:00, 26.66it/s]
[32m[2020-07-10 14:10:35] __main__ INFO: [0mElapsed 2.97
[32m[2020-07-10 14:10:35] __main__ INFO: [0mLoss 0.4049 Accuracy 0.9010


In [62]:
## Evaluate the trained, saved model using the CIFAR 10.1 test dataset 
# Right the results to the test output directory specified.
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   dataset.name CIFAR101 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00_resume400_50/checkpoint_00050.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00_resume400_50/test_results_0050_cifar101

[32m[2020-07-10 16:59:44] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnet_basic_32_cm_1_1/exp00_resume400_50/checkpoint_00050.pth
CIFAR 10.1
100%|███████████████████████████████████████████| 16/16 [00:00<00:00, 18.16it/s]
[32m[2020-07-10 16:59:45] __main__ INFO: [0mElapsed 0.88
[32m[2020-07-10 16:59:45] __main__ INFO: [0mLoss 0.8287 Accuracy 0.8260


In [57]:
## Evaluate the trained, saved model using the CIFAR 10 test dataset 
# Right the results to the test output directory specified.
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00/checkpoint_00400.pth \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00/test_results_0400_cifar10

[32m[2020-07-10 14:11:23] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnet_basic_32_cm_1_1/exp00/checkpoint_00400.pth
Files already downloaded and verified
100%|███████████████████████████████████████████| 79/79 [00:02<00:00, 27.59it/s]
[32m[2020-07-10 14:11:27] __main__ INFO: [0mElapsed 2.87
[32m[2020-07-10 14:11:27] __main__ INFO: [0mLoss 0.5840 Accuracy 0.8608


In [63]:
## Evaluate the trained, saved model using the CIFAR 10.1 test dataset 
# Right the results to the test output directory specified.
os.chdir('/home/ec2-user/SageMaker/w210-capstone/models/pytorch_imageclass/')
!python evaluate.py --config configs/cifar/resnext.yaml \
   model.resnet.depth 32 \
   test.batch_size 128 \
   test.checkpoint /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00/checkpoint_00400.pth \
   dataset.name CIFAR101 \
   test.output_dir /home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1/exp00/test_results_0400_cifar101

[32m[2020-07-10 17:00:22] fvcore.common.checkpoint INFO: [0mLoading checkpoint from /home/ec2-user/SageMaker/experiments/resnet_basic_32_cm_1_1/exp00/checkpoint_00400.pth
CIFAR 10.1
100%|███████████████████████████████████████████| 16/16 [00:00<00:00, 18.09it/s]
[32m[2020-07-10 17:00:24] __main__ INFO: [0mElapsed 0.89
[32m[2020-07-10 17:00:24] __main__ INFO: [0mLoss 0.8922 Accuracy 0.7515


#### Record the Results

In [3]:
# Write the results to a CSV file so that we can analyze later.
import pandas as pd

results = {'Model': ['resnext_29_4x64d', 'resnext_29_4x64d', 'resnext_29_4x64d', 'resnext_29_4x64d'],
           'Testset': ['cifar10', 'cifar10', 'cifar10', 'cifar10.1'],
           'Epoch': [100, 200, 300, 300],
           'Loss': [0.6746, 0.2311, 0.1517, 0.3742],
           'Accuracy': [0.8019, 0.9321, 0.9535, 0.8905],
           'Original_Accuracy': [96.4, 96.4, 96.4, 89.6],
           'Original_CI': [(96.0, 96.7), (96.0, 96.7), (96.0, 96.7), (88.2, 90.9)]
           }

df = pd.DataFrame(results, columns = ['Model', 'Testset', 'Epoch', 'Loss', 'Accuracy', 
                                      'Original_Accuracy', 'Original_CI'])



#df.to_csv('/home/ec2-user/SageMaker/experiments/wrn_28_10/exp00/results.csv')
df.head()

Unnamed: 0,Model,Testset,Epoch,Loss,Accuracy,Original_Accuracy,Original_CI
0,resnext_29_4x64d,cifar10,100,0.6746,0.8019,96.4,"(96.0, 96.7)"
1,resnext_29_4x64d,cifar10,200,0.2311,0.9321,96.4,"(96.0, 96.7)"
2,resnext_29_4x64d,cifar10,300,0.1517,0.9535,96.4,"(96.0, 96.7)"
3,resnext_29_4x64d,cifar10.1,300,0.3742,0.8905,89.6,"(88.2, 90.9)"


In [64]:
import pandas as pd
model = 'resnext_29_4x64d_cm_1_1'

a = pd.Series([model, 400, 'cifar10', ])
c = pd.Series([model, 400, 'cifar10.1', ])

e = pd.Series([model, 50, 'cifar10.1', ])
f = pd.Series([model, 50, 'cifar10', ])
               
df_results = pd.concat([a,c,e,f], axis=1).T
df_results.columns = ['Model', 'Epoch', 'Testset', 'Loss', 'Accuracy']

df_results['Original_Accuracy'] = df_results.apply((lambda row: 96.4 if row[2] == 'cifar10' else 89.6), axis=1)
df_results['Original_CI'] = df_results.apply((lambda row: (96.0, 96.7) if row[2] == 'cifar10' else (88.2, 90.9)), axis=1)

df_results.to_csv('/home/ec2-user/SageMaker/experiments/' + model + '/results.csv')
df_results

Unnamed: 0,Model,Epoch,Testset,Loss,Accuracy,Original_Accuracy,Original_CI
0,resnet_basic_32_cm_1_1,400,cifar10,0.584,0.8608,92.5,"(92.0, 93.0)"
1,resnet_basic_32_cm_1_1,400,cifar10.1,0.8922,0.7515,84.9,"(83.2, 86.4)"
2,resnet_basic_32_cm_1_1,50,cifar10.1,0.8287,0.826,84.9,"(83.2, 86.4)"
3,resnet_basic_32_cm_1_1,50,cifar10,0.4049,0.901,92.5,"(92.0, 93.0)"


In [118]:
# Upload the model checkpoints, configs, and results to S3 
bucket='sagemaker-may29'
prefix = 'sagemaker/results/original-models/resnext_29_4x64d_cm_1_1'
path = '/home/ec2-user/SageMaker/experiments/resnext_29_4x64d_cm_1_1'

s3_resource = boto3.resource("s3", region_name="us-east-2")

def uploadDirectory(local_path,bucket_name,s3_prefix):

    my_bucket = s3_resource.Bucket(bucket_name)
    
    for path, subdirs, files in os.walk(local_path):
        path = path.replace("\\","/")
        directory_name = path.replace(local_path,"")
        for file in files:
            #print("Local File:", os.path.join(path, file))
            #print("      Dest:", s3_prefix+directory_name+'/'+file)
            my_bucket.upload_file(os.path.join(path, file), s3_prefix+directory_name+'/'+file)
    
uploadDirectory(path,bucket,prefix)