*Note*: None of this code has been run in this Jupyter notebook. This code has simply been copied/pasted from my AWS sagemaker instance

# Predicting Household Voltage Output

[This tutorial](https://aws.amazon.com/blogs/machine-learning/build-multiclass-classifiers-with-amazon-sagemaker-linear-learner/) by AWS is by far the most helpful for my purposes.

## Connect to S3 bucket and load/prepare data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import boto3
from sagemaker import get_execution_role

This block of code imports the data directly from the 'sagemaker-uci-iot-data' S3 Bucket

In [None]:
role = get_execution_role()
bucket='sagemaker-uci-iot-data'
data_key = 'household_power_consumption.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)

power = pd.read_csv(data_location, parse_dates=['Date_Time'])
power = power.set_index('Date_Time')
power = power
power.head()

In [None]:
for i in list(power.columns):
    power = power[pd.to_numeric(power[i], errors='coerce').notnull()]

for i in list(power.columns):
    power[[i]] = power[[i]].astype('float32')

print(power.dtypes)
print(len(power))

In [None]:
data = power.loc['2007-01-18':'2007-01-26', ['Global_active_power', 'Voltage','Laundry', 'Heat_AC', 'Kitchen']]
data.head()

In [None]:
def timeseries_train_test_split(X, y, test_size):
    """
        Perform train-test split with respect to time series structure
    """
    
    # get the index after which test set starts
    test_index = int(len(X)*(1-test_size))
    
    X_train = X.iloc[:test_index]
    y_train = y.iloc[:test_index]
    X_test = X.iloc[test_index:]
    y_test = y.iloc[test_index:]
    
    return X_train, X_test, y_train, y_test

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
y = data.dropna().Voltage
X = data.dropna().drop(['Voltage'], axis=1)

X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3)

X_train_scaled = scaler.fit_transform(X_train).astype('float32')
X_test_scaled = scaler.transform(X_test).astype('float32')

## Making Predictions

In [None]:
from sagemaker import LinearLearner
from sagemaker.amazon.amazon_estimator import RecordSet

In [None]:
bucket = 'sagemaker-uci-iot-data'
data_location = "training_data"


data_output_location = 's3://{}/household_consumption_data/data'.format(bucket)
model_output_location = 's3://{}/household_consumption_predictions/output'.format(bucket)

print('training data will be uploaded to: {}'.format(data_output_location))
print('training artifacts will be uploaded to: {}'.format(model_output_location))




In [None]:
linear = LinearLearner(role=role,
                train_instance_count=1,
                train_instance_type='ml.c4.8xlarge',
                output_path=model_output_location,
                data_location=data_output_location,
                predictor_type = 'regressor')

In [None]:
# wrap data in RecordSet objects
train_records = linear.record_set(X_train_scaled, y_train, channel='train')
test_records = linear.record_set(X_test_scaled, y_test, channel='test')

In [None]:
# start a training job
linear.fit([train_records, test_records])

In [None]:
# deploy a model hosting endpoint
linear_predictor = linear.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

In [None]:
# delete endpoints
linear_predictor.delete_endpoint()