In [49]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import os
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline


In [50]:
path = "source"
if not os.path.exists(path):
    os.mkdir("source")
    
scaler = MinMaxScaler()

boston = load_boston()

my_data = boston.data
#my_data=scaler.fit_transform(my_data)

my_target = boston.target
#my_target=np.reshape(my_target, (len(my_target), 1))
#my_target=scaler.fit_transform(my_target)

X_train, X_test, Y_train, Y_test = train_test_split(my_data, my_target, \
                                                    test_size=0.23, \
                                                    random_state=1)

In [51]:
import boto3
import sagemaker 
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

bucket = sagemaker_session.default_bucket()

In [52]:
import os

def make_csv(x, y, filename, data_dir):
    '''Merges features and labels and converts them into one csv file with labels in the first column.
       :param x: Data features
       :param y: Data labels
       :param file_name: Name of csv file, ex. 'train.csv'
       :param data_dir: The directory where files will be saved
       '''
    # make data dir, if it does not exist
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    
    # first column is the labels and rest is features 
    pd.concat([pd.DataFrame(y), pd.DataFrame(x)], axis=1)\
             .to_csv(os.path.join(data_dir, filename), header=False, index=False)
    
    # nothing is returned, but a print statement indicates that the function has run
    print('Path created: '+str(data_dir)+'/'+str(filename))



In [53]:
import argparse
import sys
import os
import json
import pandas as pd



path = 'Boston-Housing' # the folder we will use for storing data
name = 'train.csv'

# create 'train.csv'
make_csv(X_train, Y_train, name, path)

prefix = 'Boston-Data'

# upload to S3
input_data = sagemaker_session.upload_data(path, bucket=bucket, key_prefix=prefix)
print(input_data)

Path created: Boston-Housing/train.csv
s3://sagemaker-us-east-2-496568984551/Boston-Data


In [54]:
# iterate through S3 objects and print contents
for obj in boto3.resource('s3').Bucket(bucket).objects.all():
     print(obj.key)
        
print()

Boston-Data/sagemaker-pytorch-2019-09-17-00-55-59-239/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-02-14-057/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-08-29-295/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-16-09-658/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-26-19-733/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-32-14-837/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-36-25-066/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-50-01-829/output/model.tar.gz
Boston-Data/sagemaker-pytorch-2019-09-17-01-54-56-221/output/model.tar.gz
Boston-Data/train.csv
moon-data/sagemaker-pytorch-2019-09-16-23-44-37-261/output/model.tar.gz
moon-data/train.csv
sagemaker-pytorch-2019-09-16-22-16-31-906/source/sourcedir.tar.gz
sagemaker-pytorch-2019-09-16-22-22-44-474/source/sourcedir.tar.gz
sagemaker-pytorch-2019-09-16-22-30-02-188/source/sourcedir.tar.gz
sagemaker-pytorch-2019

In [55]:
!pygmentize source/model.py

[37m# -*- coding: utf-8 -*-[39;49;00m
[33m"""[39;49;00m
[33mCreated on Sun Sep 15 18:53:19 2019[39;49;00m
[33m[39;49;00m
[33m@author: tony[39;49;00m
[33m"""[39;49;00m
[34mimport[39;49;00m [04m[36mtorch[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.nn[39;49;00m [34mas[39;49;00m [04m[36mnn[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.nn.functional[39;49;00m [34mas[39;49;00m [04m[36mF[39;49;00m

[34mclass[39;49;00m [04m[32mSimpleNet[39;49;00m(torch.nn.Module):
    [34mdef[39;49;00m [32m__init__[39;49;00m([36mself[39;49;00m, n_features, size_hidden, n_output):
        [36msuper[39;49;00m(SimpleNet, [36mself[39;49;00m).[32m__init__[39;49;00m()
        [36mself[39;49;00m.hidden = torch.nn.Linear(n_features, size_hidden)
        [36mself[39;49;00m.predict = torch.nn.Linear(size_hidden, n_output)
        
    [34mdef[39;49;00m [32mforward[39;49;00m([36mself[39;49;00m, x):
        x= F.relu([36mself[39;49;00m.hidd

In [56]:
# import a PyTorch wrapper
from sagemaker.pytorch import PyTorch

# specify an output path
# prefix is specified above
output_path = 's3://{}/{}'.format(bucket, prefix)

# instantiate a pytorch estimator
estimator = PyTorch(entry_point='train.py',
                    source_dir='source', # this should be just "source" for your code
                    role=role,
                    framework_version='1.0',
                    train_instance_count=1,
                    train_instance_type='ml.c4.xlarge',
                    output_path=output_path,
                    sagemaker_session=sagemaker_session,
                    hyperparameters={
                        'input_dim': 13,  # num of features
                        'hidden_dim': 100,
                        'output_dim': 1,
                        'epochs': 80 # could change to higher
                    })
    

In [57]:
%%time 
# train the estimator on S3 training data
estimator.fit({'train': input_data})

2019-09-17 02:00:24 Starting - Starting the training job...
2019-09-17 02:00:26 Starting - Launching requested ML instances...
2019-09-17 02:01:22 Starting - Preparing the instances for training......
2019-09-17 02:02:14 Downloading - Downloading input data...
2019-09-17 02:02:48 Training - Training image download completed. Training in progress.[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-09-17 02:02:49,828 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-09-17 02:02:49,831 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[31m2019-09-17 02:02:49,843 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-09-17 02:02:52,870 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31m2019-09-17 02:02:53,208 sagemaker-containers INFO     Mod


2019-09-17 02:03:05 Uploading - Uploading generated training model
2019-09-17 02:03:05 Completed - Training job completed
Training seconds: 51
Billable seconds: 51
CPU times: user 413 ms, sys: 27 ms, total: 440 ms
Wall time: 3min 11s
