In [1]:
# Download data from S3 bucket
import boto3
s3 = boto3.client('s3')

bucket_name = 'breakhis-dataset'
response = s3.list_objects_v2(Bucket='breakhis-dataset')
for obj in response.get('Contents', []):
    print(obj['Key'])
object_key = 'breakhis_full.zip'
local_filename = 'breakhis-dataset.zip'

s3.download_file(bucket_name, object_key, local_filename)
print("Download complete")

breakhis_full.zip
Download complete!


In [2]:
# Unzip dataset
import zipfile
import os

with zipfile.ZipFile(local_filename, 'r') as zip_ref:
    zip_ref.extractall('breakhis')  # extracts to folder named breakhis

print("Unzip complete")

Unzip complete!


In [3]:
# Verify directories
print(os.getcwd())
print(os.listdir('/home/sagemaker-user'))

/home/sagemaker-user
['.bashrc', 'user-default-efs', '.sagemaker_sql_editor_api_cache', '.local', '.ipython', '.npm', '.jupyter', 'Untitled.ipynb', '.ipynb_checkpoints', '.cache', '.config', '.virtual_documents', 'breast-cancer-tissue-labeling.ipynb', 'breakhis-dataset.zip', 'breakhis']


In [4]:
path = '/home/sagemaker-user/breakhis/BreakHis_dataset'
print("Exists?", os.path.exists(path))
print("Is directory?", os.path.isdir(path))
print("Contents:", os.listdir(path) if os.path.exists(path) else "Path not found")

Exists? True
Is directory? True
Contents: ['Folds.csv', 'BreaKHis_v1']


In [5]:
breast_path = '/home/sagemaker-user/breakhis/BreakHis_dataset/BreaKHis_v1/BreaKHis_v1/histology_slides/breast'

print("Contents of breast:")
print(os.listdir(breast_path))

Contents of breast:
['malignant', 'README.txt', 'benign', 'count_files.sh']


In [9]:
os.chdir('/home/sagemaker-user/breast_cancer_detection')
print('config.json' in os.listdir())

True


In [10]:
# Update config file
!cat config.json
print(os.getcwd())
print(os.listdir())
import json

config_path = 'config.json'

# Load config
with open(config_path, 'r') as f:
    config = json.load(f)

# Update data directory to output path
config['data_loader']['args']['data_dir'] = '/home/sagemaker-user/breakhis/BreakHis_dataset/BreaKHis_v1/BreaKHis_v1/histology_slides/breast'

with open(config_path, 'w') as f:
    json.dump(config, f, indent=4)

print("Config file updated")

{
    "name": "BCDensenet",
    "n_gpu": 1,

    "arch": {
        "type": "densenet121",
        "args": {}
    },
    "data_loader": {
        "type": "BCDataLoader",
        "args":{
            "data_dir": "data/BreaKHis_v1/BreaKHis_v1/histology_slides/breast",
            "batch_size": 16,
            "shuffle": true,
            "validation_split": 0.1,
            "num_workers": 2
        }
    },
    "optimizer": {
        "type": "Adam",
        "args":{
            "lr": 0.001,
            "weight_decay": 0,
            "amsgrad": true
        }
    },
    "loss": "cross_entropy",
    "metrics": [
        "accuracy", "top_k_acc"
    ],
    "lr_scheduler": {
        "type": "StepLR",
        "args": {
            "step_size": 20,
            "gamma": 0.1
        }
    },
    "trainer": {
        "epochs": 15,

        "save_dir": "saved/",
        "save_period": 1,
        "verbosity": 2,
        
        "monitor": "min val_loss",
        "early_stop": 10,

        "tensorboa

In [11]:
# Train classifier
!python train.py --config config.json

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /home/sagemaker-user/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|███████████████████████████████████████| 30.8M/30.8M [00:00<00:00, 255MB/s]
DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv

In [17]:
# Verify output directory & save trained model
print(os.getcwd())
os.chdir('/home/sagemaker-user/breast_cancer_detection')
print(os.listdir('saved/models/BCDensenet'))

/home/sagemaker-user/breast_cancer_detection
['0224_034642', '0617_002401']


In [2]:
import shutil
os.chdir('/home/sagemaker-user/breast_cancer_detection')
shutil.make_archive('my_model', 'zip', 'saved/models/BCDensenet/0617_002401')

'/home/sagemaker-user/breast_cancer_detection/my_model.zip'