In [None]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

data = pd.read_csv('data/creditcards.csv')

# Shuffle data
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

#standardize data
scaler = MinMaxScaler(feature_range=(0,1))
data[['Amount']] = scaler.fit_transform(data[['Amount']])

# Separate features and target
X = data.drop('Class', axis=1) 
y = data['Class']

# Initialize RandomOverSampler
ros = RandomOverSampler(random_state=42)

# Resample
X_resampled, y_resampled = ros.fit_resample(X, y)

#create train and test split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

train_data = pd.concat([pd.DataFrame(X_train, columns=X.columns), pd.Series(y_train, name='Class')], axis=1)
test_data = pd.concat([pd.DataFrame(X_test, columns=X.columns), pd.Series(y_test, name='Class')], axis=1)

train_data.to_csv('train_data.csv', index=False, header=False)
test_data.to_csv('test_data.csv', index=False, header=False)

In [None]:
## Make S3 bucket

#Name is "sagemaker-build-and-deploy-sagemaker"

# move data into S3 bucket

In [None]:
import boto3

bucket_name = "sagemaker-build-and-deploy-sagemaker"

train_data.to_csv('data.csv', header = False, index =False)
key = 'data/train/data'
url = 's3://{}/{}'.format(bucket_name,key)
boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file('data.csv')

test_data.to_csv('data.csv', header = False, index =False)
key = 'data/test/data'
url = 's3://{}/{}'.format(bucket_name,key)
boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file('data.csv')

In [None]:
# create model
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri

key = 'model/xgb_model'
s3_output_location = url = 's3://{}/{}'.format(bucket_name, key)

xgb_model =sagemaker.estimator.Estimator(
    get_image_uri(boto3.Session().region_name, 'xgboost'),
    get_execution_role(),
    train_instance_count = 1,
    train_instance_type = 'ml.m4.xlarge',
    train_volume_size =5,
    output_path =s3_output_location,
    sagemaker_session=sagemaker.Session()
)

xgb_model.set_hyperparameters(
    max_depth = 5,
    eta =.2,
    gamma = 4,
    min_child_weight =6,
    silent =0,
    objective = 'multi:softmax',
    num_class =3,
    num_round =10
)

In [None]:
#train model
train_data = 's3://{}/{}'.format(bucket_name,'data/train')
test_data = 's3://{}/{}'.format(bucket_name,'data/test')

train_channel = sagemaker.session.s3.input(train_data, content_type ='text/csv')
test_channel = sagemaker.session.s3.input(test_data, content_type ='text/csv')

data_channels = {'train': train_channel, 'test':test_channel}

xgb_model.fit(inputs = data_channels)

In [None]:
#deploy model
xgb_predictor = xgb_model.deploy(initial_instance_count =1,
                                 instance_type = 'ml.m4.large')