In [None]:
import os
import datetime
import itertools

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.ticker as plticker

import boto3

import sagemaker
from sagemaker.amazon.common import RecordSerializer
from sagemaker import RandomCutForest
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

import matplotlib.pyplot as plt

In [None]:
client = boto3.client('timestream-query')
%matplotlib inline

In [None]:
interval = "1m"
period = '2d'

In [None]:
response = client.query(
    QueryString='SELECT BIN(time, ' + interval +') AS BINNED_TIMESTAMP, ROUND(AVG(measure_value::double), 2) AS TEMPERATURE, ROOM as ROOM FROM "TemperatureHistory"."TemperatureHistory" WHERE measure_name = \'TEMPERATURE\' AND ROOM = \'office\' AND time > ago(' + period + ') GROUP BY measure_name, BIN(time, ' + interval + '), ROOM ORDER BY BINNED_TIMESTAMP ASC'
)

In [None]:
time = []
temperature = []

for x in response['Rows']:
    raw_date = x["Data"][0]["ScalarValue"]
    raw_temperature = x["Data"][1]["ScalarValue"]
    formatted_date = datetime.datetime.strptime(raw_date, "%Y-%m-%d %H:%M:%S.%f000")

    temperature.append(raw_temperature)
    time.append(str(formatted_date.day) + "/" + str(formatted_date.month) + " " + str(formatted_date.hour) + ":" + str(formatted_date.minute))

In [None]:
print("Average temperature over this period: " + str(sum([float(i) for i in temperature]) / len(temperature)) + "oC")
print("Seen data points: " + str(len(temperature)))

In [None]:
float_temperatures = [float(temp) for temp in temperature]

In [None]:
def plot_matplotlib(time, temp):
    fig, ax = plt.subplots(figsize=(10, 6))

    ax.yaxis.set_major_locator(plticker.MultipleLocator(0.2))
    ax.xaxis.set_major_locator(plticker.MultipleLocator(720))

    fig.autofmt_xdate()

    ax.plot(time, temp, color='tab:orange', label='Temperature')

    ax.set_xlabel('Date')
    ax.set_ylabel('Temperature (oC)')
    ax.set_title('Temperature over time')
    ax.legend(loc='upper left');
    
plot_matplotlib(time, float_temperatures)

In [None]:
temperature_time_series = zip(time, float_temperatures)

In [None]:
temperature_dataframe = pd.DataFrame(data=temperature_time_series)

In [None]:
temperature_dataframe.plot(title="Temperature over time")

In [None]:
session = sagemaker.Session()

In [None]:
rcf = RandomCutForest(
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type="ml.m4.xlarge",
    data_location="s3://xyz.jacobclark.sagemaker/sagemaker/randomcutforest",
    output_path="s3://xyz.jacobclark.sagemaker/sagemaker/randomcutforest/out",
    num_samples_per_tree=512,
    num_trees=50,
)

temperature_dataframe_numpy = temperature_dataframe[1].to_numpy().reshape(-1, 1)

record_set = rcf.record_set(temperature_dataframe_numpy)

rcf.fit(record_set)

In [None]:
rcf_inference = rcf.deploy(
    initial_instance_count=1, 
    instance_type="ml.m4.xlarge"
)

In [None]:
rcf_inference.serializer = CSVSerializer()
rcf_inference.deserializer = JSONDeserializer()

In [None]:
results = rcf_inference.predict(temperature_data_numpy)

In [None]:
scores = [datum["score"] for datum in results["scores"]]

In [None]:
temperature_dataframe["score"] = pd.Series(scores, index=temperature_dataframe.index)

score_mean = temperature_dataframe.score.mean()
score_std = temperature_dataframe.score.std()

# Temperatures between ~17.8 - ~24.8 are considered "normal" with a std deviation of 3
std_deviations_to_cutoff_by = 3
score_cutoff = score_mean + std_deviations_to_cutoff_by * score_std

anomalies_dataframe = temperature_dataframe[temperature_dataframe['score'] > score_cutoff]

In [None]:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()

# Plot Temperature against Dates
ax1.plot(temperature_dataframe[0], temperature_dataframe[1], alpha=0.8)

# Plot Anomaly score data
ax2.plot(temperature_dataframe['score'], color='C1')

# Plot Anomalies that are higher than the standard deviation 
ax2.plot(anomalies_dataframe.index, anomalies_dataframe.score, 'ko')

ax1.set_ylabel('Temperature Data', color='C0')
ax1.tick_params('y', colors='C0')

ax1.xaxis.set_major_locator(plticker.MultipleLocator(720))

ax2.set_ylabel('Anomaly Score', color='C1')
ax2.tick_params('y', colors='C1')

fig.autofmt_xdate()
fig.suptitle('Temperature data over time')
plt.show()

In [None]:
sagemaker.Session().delete_endpoint(rcf_inference.endpoint)