# CS6301 IOT Project 2

## Fault Diagnostic Tool (Read Side)

This side of the application ingests data from the data generator and ensures it's being imported into the database correctly.

### Before Running this Notebook
1. Ensure the database is running on your localhost. Using docker-compose file in the root of the project directory we can start the InfluxDB with the following command from the root directory:
`dc up timeseriesdb`

2. Turn on the data generator with the following command from the "iot" directory.
`java -jar datagen-2.2-SNAPSHOT.jar -offline`

## Connect to Database

In [1]:
!pip install influxdb



In [2]:
from influxdb import InfluxDBClient
import json

In [3]:
client = InfluxDBClient(host='localhost', port=8086, username='admin', password='password')

In [4]:
client.get_list_database()

[{'name': 'timeseriesdb'}, {'name': '_internal'}]

## Ingest Data (Example: Offline Data)
In this section we will ingest the data from a text file for exploration purposes. Normally this data would be ingesteddirctly from the Java data generator.

In [5]:
client.drop_database('timeseriesdb')

In [6]:
client.create_database('timeseriesdb')
client.switch_database('timeseriesdb')

In [15]:
f = open('data/offline-train-SMALL.txt', 'r')
lines = f.readlines()
f.close()

In [16]:
len(lines)

26452

In [17]:
measurement = 'gear_metrics'

In [18]:
data = []

In [19]:
x = json.loads(lines[0])
x

{'metric': 'offline',
 'timestamp': 0,
 'label': 0,
 'sr': 97656.0,
 'rate': 25.0,
 'gs': 0.8407559,
 'load': '270.0'}

In [20]:
data = []
for line in lines:
    json_line = json.loads(line)
    # Form: 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=0.8407559,load=270.0,timestamp=0'
    data.append("{},metric={} label={},sr={},rate={},gs={},load={},timestamp={}".format(measurement, json_line['metric'], json_line['label'], json_line['sr'], json_line['rate'], json_line['gs'], json_line['load'], json_line['timestamp']))
    
    

In [21]:
data[:10]

['gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=0.8407559,load=270.0,timestamp=0',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=0.5152432,load=270.0,timestamp=1',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=-0.03834483,load=270.0,timestamp=2',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=1.184862,load=270.0,timestamp=3',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=0.8497145,load=270.0,timestamp=4',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=-0.3333637,load=270.0,timestamp=5',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=0.0906198,load=270.0,timestamp=6',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=-0.3867708,load=270.0,timestamp=7',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=-0.4381106,load=270.0,timestamp=8',
 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=-0.7925295,load=270.0,timestamp=9']

In [None]:
'labeled_data, metric=classification timestamp=2020-03-26T21:14:48.527Z,label=0'

In [41]:
client.write_points(data, database='timeseriesdb', time_precision='ms', batch_size=100, protocol='line')

NameError: name 'data' is not defined

In [45]:
results = client.query('SELECT label, sr, rate, gs, load, ts FROM timeseriesdb.autogen.gear_metrics')

In [46]:
len(results.raw['series'][0]['values'])

291742

In [47]:
results.raw['series'][0]['values'][-10:]

[['2020-03-25T06:56:53.605Z', 0, 97656, 25, -0.3707241, '270.0', 268],
 ['2020-03-25T06:56:53.655Z', 0, 97656, 25, -0.2623347, '270.0', 269],
 ['2020-03-25T06:56:53.705Z', 0, 97656, 25, -0.2415872, '270.0', 270],
 ['2020-03-25T06:56:53.754Z', 0, 97656, 25, 0.1525302, '270.0', 271],
 ['2020-03-25T06:56:53.803Z', 0, 97656, 25, -0.1951451, '270.0', 272],
 ['2020-03-25T06:56:53.852Z', 0, 97656, 25, -0.04591288, '270.0', 273],
 ['2020-03-25T06:56:53.9Z', 0, 97656, 25, 0.7581841, '270.0', 274],
 ['2020-03-25T06:56:53.966Z', 0, 97656, 25, 1.201549, '270.0', 275],
 ['2020-03-25T06:56:54.019Z', 0, 97656, 25, 0.6854323, '270.0', 276],
 ['2020-03-25T06:56:54.071Z', 0, 97656, 25, -0.462476, '270.0', 277]]

In [None]:
for point in points:
    print("Time: {}, gs: {}".format(point['timestamp'], point['gs']))

## Offline Training
In this section we visualize the data ingested from the Java data generator. 

In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
def create_csv():
	file = open('data/offline-train-BIG.txt','r')

	with open('data/data.csv','w') as fcsv:
		fcsv.write("Metric,Timestamp,Label,SR,Rate,GR,Load\n")
		for f in file:
			l= f.split(',')

			m = l[0].split(":")[1].split('"')[1]	
			#metric.append(m)

			t =l[1].split(":")[1]
			#timestamp.append(t)

			la =l[2].split(":")[1]
			#label.append(la)

			s = l[3].split(":")[1]
			#sr.append(s)

			r = l[4].split(":")[1]
			#rate.append(r)

			g = l[5].split(":")[1]
			#gr.append(g)

			lo = l[6].split(":")[1].split('"')[1]	
			#load.append(lo)
			fcsv.write(m+","+t+","+la+","+s+","+r+","+g+","+lo+"\n")

In [None]:
# Convert the JSON to CSV
create_csv()

In [None]:
# We will train the model offline in a py file

## Classify

In [7]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np

In [10]:
results = client.query('SELECT label, sr, gs, load, timestamp FROM timeseriesdb.autogen.gear_metrics')

In [11]:
len(results.raw['series'][0]['values'])

275

In [8]:
data_to_classify = results.raw['series'][0]['values'][0:10]

In [9]:
gear_data=pd.read_csv("data/data_predict.csv")
print(gear_data.head())

cols_to_norm = ['SR', 'GR','Load']

gear_data[cols_to_norm] = gear_data[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

sr = tf.feature_column.numeric_column('SR')
rate = tf.feature_column.numeric_column('Rate')
gr = tf.feature_column.numeric_column('GR')
load = tf.feature_column.numeric_column('Load')

print(type(gear_data))

    Metric  Timestamp  Label       SR  Rate        GR   Load
0  offline          0      0  97656.0  25.0  0.840756  270.0
1  offline          1      0  97656.0  25.0  0.515243  270.0
2  offline          2      0  97656.0  25.0 -0.038345  270.0
3  offline          3      0  97656.0  25.0  1.184862  270.0
4  offline          4      0  97656.0  25.0  0.849715  270.0
<class 'pandas.core.frame.DataFrame'>


In [50]:
classifications = client.query('SELECT label, timestamp FROM timeseriesdb.autogen.labeled_data')

In [51]:
len(classifications.raw['series'][0]['values'])

1013

In [52]:
classifications.raw['series'][0]['values']

[['2020-03-26T21:21:36.969Z', 0, 23021],
 ['2020-03-26T21:25:10.586Z', 2, 30696],
 ['2020-03-26T21:25:54.946Z', 0, 33234],
 ['2020-03-26T21:28:32.79Z', 0, 35038],
 ['2020-03-26T21:28:32.813Z', 0, 35138],
 ['2020-03-26T21:28:32.825Z', 0, 35238],
 ['2020-03-26T21:28:32.838Z', 0, 35338],
 ['2020-03-26T21:28:32.853Z', 0, 35438],
 ['2020-03-26T21:28:32.87Z', 0, 35538],
 ['2020-03-26T21:28:32.883Z', 0, 35638],
 ['2020-03-26T21:28:32.895Z', 0, 35738],
 ['2020-03-26T21:28:32.907Z', 0, 35838],
 ['2020-03-26T21:28:32.919Z', 0, 35938],
 ['2020-03-26T21:29:02.507Z', 0, 35388],
 ['2020-03-26T21:29:02.522Z', 0, 35389],
 ['2020-03-26T21:29:02.535Z', 0, 35390],
 ['2020-03-26T21:29:02.544Z', 0, 35391],
 ['2020-03-26T21:29:02.553Z', 0, 35392],
 ['2020-03-26T21:29:02.563Z', 0, 35393],
 ['2020-03-26T21:29:02.574Z', 0, 35394],
 ['2020-03-26T21:29:02.586Z', 0, 35395],
 ['2020-03-26T21:29:02.597Z', 0, 35396],
 ['2020-03-26T21:29:02.61Z', 0, 35397],
 ['2020-03-26T21:29:02.621Z', 0, 35398],
 ['2020-03-26T21:29