In [None]:
import sagemaker
import boto3
from sagemaker import image_uris
from sagemaker.session import Session
from sagemaker.inputs import TrainingInputb

In [None]:
import pandas as pd

dataset = pd.read_csv('housing.csv')

In [None]:
# Move 'medv' column to front
dataset = pd.concat([dataset['medv'], dataset.drop(['medv'], axis=1)], axis=1)

In [None]:
from sklearn.model_selection import train_test_split
training_dataset, validation_dataset = train_test_split(dataset, test_size=0.1)

print(training_dataset.shape)
print(validation_dataset.shape)

In [None]:
training_dataset.to_csv('training_dataset.csv', index=False, header=False)
validation_dataset.to_csv('validation_dataset.csv', index=False, header=False)

In [None]:
import sagemaker

print(sagemaker.__version__)

sess = sagemaker.Session()
bucket = sess.default_bucket()

prefix = 'boston-housing'
training_data_path = sess.upload_data(path='training_dataset.csv', key_prefix=prefix + '/input/training')
validation_data_path = sess.upload_data(path='validation_dataset.csv', key_prefix=prefix + '/input/validation')

print(training_data_path)
print(validation_data_path)

In [None]:
from sagemaker.xgboost.estimator import XGBoost
from sagemaker.image_uris import retrieve

hyperparameters = {
    "max_depth": "5",
    "eta": "0.2",
    "gamma": "4",
    "min_child_weight": "6",
    "subsample": "0.7",
    "objective": "reg:squarederror",
    "num_round": "50"
}
# Specify the XGBoost container
container = retrieve("xgboost", sess.boto_region_name, version="1.3-1")
role = sagemaker.get_execution_role() 

# Create a SageMaker estimator
estimator = sagemaker.estimator.Estimator(
    image_uri=container,
    hyperparameters=hyperparameters,
    role=role,
    instance_count=1,
    instance_type='ml.m5.2xlarge',
    volume_size=5,  # 5 GB
    output_path=f's3://{bucket}/{prefix}/output'
)


In [None]:
# Define the data type and paths to the training and validation datasets
content_type = "csv"
train_input = TrainingInput(training_data_path, content_type=content_type)
validation_input = TrainingInput(validation_data_path, content_type=content_type)

# Fit the estimator on the data
estimator.fit({'train': train_input, 'validation': validation_input})

In [None]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime())

endpoint_name = 'linear-learner-demo-'+timestamp
print(endpoint_name)

In [None]:
predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    endpoint_name=endpoint_name  
)

In [None]:
test_sample = '0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,4.98'

In [None]:
predictor.serializer = sagemaker.serializers.CSVSerializer()
predictor.deserializer = sagemaker.deserializers.CSVDeserializer()

response = predictor.predict(test_sample)
print(response)