# CS6301 IOT Project 2

## Fault Diagnostic Tool (Read Side)

This side of the application ingests data from the data generator and ensures it's being imported into the database correctly.

### Before Running this Notebook
1. Ensure the database is running on your localhost. Using docker-compose file in the root of the project directory we can start the InfluxDB with the following command from the root directory:
`dc up timeseriesdb`

2. Turn on the data generator with the following command from the "iot" directory.
`java -jar datagen-2.2-SNAPSHOT.jar -offline`

## Connect to Database

In [1]:
!pip install influxdb



In [2]:
from influxdb import InfluxDBClient
import json

In [3]:
client = InfluxDBClient(host='localhost', port=8086, username='admin', password='password')

In [5]:
client.get_list_database()

[{'name': '_internal'}, {'name': 'timeseriesdb'}]

## Ingest Data (Example: Offline Data)
In this section we will ingest the data from a text file for exploration purposes. Normally this data would be ingesteddirctly from the Java data generator.

In [None]:
# client.drop_database('timeseriesdb')

In [None]:
client.create_database('timeseriesdb')
client.switch_database('timeseriesdb')

In [None]:
f = open('data/offline-train.txt', 'r')
lines = f.readlines()
f.close()

In [None]:
len(lines)

In [None]:
measurement = 'gear_metrics'

In [None]:
data = []

In [None]:
x = json.loads(lines[0])
x

In [None]:
data = []
for line in lines:
    json_line = json.loads(line)
    # Form: 'gear_metrics,metric=offline label=0,sr=97656.0,rate=25.0,gs=0.8407559,load=270.0,timestamp=0'
    data.append("{},metric={} label={},sr={},rate={},gs={},load={},timestamp={}".format(measurement, json_line['metric'], json_line['label'], json_line['sr'], json_line['rate'], json_line['gs'], json_line['load'], json_line['timestamp']))
    
    

In [None]:
data[:10]

In [None]:
client.write_points(data, database='timeseriesdb', time_precision='ms', batch_size=100, protocol='line')

In [None]:
results = client.query('SELECT label, sr, rate, gs, load, timestamp FROM timeseriesdb.autogen.gear_metrics')

In [None]:
len(results.raw['series'][0]['values'])

In [None]:
results.raw['series'][0]['values'][0:10]

In [None]:
for point in points:
    print("Time: {}, gs: {}".format(point['timestamp'], point['gs']))

## Offline Training
In this section we visualize the data ingested from the Java data generator. 

In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
def create_csv():
	file = open('data/offline-train-BIG.txt','r')

	with open('data/data.csv','w') as fcsv:
		fcsv.write("Metric,Timestamp,Label,SR,Rate,GR,Load\n")
		for f in file:
			l= f.split(',')

			m = l[0].split(":")[1].split('"')[1]	
			#metric.append(m)

			t =l[1].split(":")[1]
			#timestamp.append(t)

			la =l[2].split(":")[1]
			#label.append(la)

			s = l[3].split(":")[1]
			#sr.append(s)

			r = l[4].split(":")[1]
			#rate.append(r)

			g = l[5].split(":")[1]
			#gr.append(g)

			lo = l[6].split(":")[1].split('"')[1]	
			#load.append(lo)
			fcsv.write(m+","+t+","+la+","+s+","+r+","+g+","+lo+"\n")

In [None]:
# Convert the JSON to CSV
create_csv()

In [None]:
# We will train the model offline in a py file

## Classify

In [11]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np

In [6]:
results = client.query('SELECT label, sr, gs, load, timestamp FROM timeseriesdb.autogen.gear_metrics')

In [7]:
len(results.raw['series'][0]['values'])

35208

In [9]:
data_to_classify = results.raw['series'][0]['values'][0:10]

In [14]:
gear_data=pd.read_csv("data/data_predict.csv")
print(gear_data.head())

cols_to_norm = ['SR', 'GR','Load']

gear_data[cols_to_norm] = gear_data[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

sr = tf.feature_column.numeric_column('SR')
rate = tf.feature_column.numeric_column('Rate')
gr = tf.feature_column.numeric_column('GR')
load = tf.feature_column.numeric_column('Load')

print(type(gear_data))

    Metric  Timestamp  Label       SR  Rate        GR   Load
0  offline          0      0  97656.0  25.0  0.840756  270.0
1  offline          1      0  97656.0  25.0  0.515243  270.0
2  offline          2      0  97656.0  25.0 -0.038345  270.0
3  offline          3      0  97656.0  25.0  1.184862  270.0
4  offline          4      0  97656.0  25.0  0.849715  270.0
<class 'pandas.core.frame.DataFrame'>


In [22]:
model = tf.keras.models.load_model('dnn/models/trained_model.h5')

TypeError: Unexpected keyword argument passed to optimizer: name

In [20]:
predictions = model.predict(x_data.values)

NameError: name 'model' is not defined

In [17]:
feat_cols = [sr,gr,load]

x_data = gear_data.drop(['Label','Metric','Timestamp','Rate'],axis=1)

labels = gear_data['Label']

x_train, x_test, y_train, y_test = train_test_split(x_data,labels,test_size=0.10, random_state=101)


x_data=tf.keras.utils.normalize(x_data,axis=1)
x_train=tf.keras.utils.normalize(x_train,axis=1)
x_test=tf.keras.utils.normalize(x_test,axis=1)


new_model = tf.keras.models.load_model('trained_model.h5')
new_predictions = new_model.predict(x_data.values)

count0, count1, count2 = 0, 0, 0
for i in new_predictions:
	cl = np.argmax(i)
	if cl==0:
		count0+=1
	elif cl==1:
		count1+=1
	elif cl==2:
		count2+=1
val_loss,val_acc=new_model.evaluate(x_test,y_test)
print(x_train.values.shape)
new_model.fit(x_train.values,y_train.values,epochs=1)
new_model.save('trained_model.h5')
print(val_acc)
print(count0,count1,count2)
print("This batch is classified as class: "+str(np.argmax(np.array([count0,count1,count2]))))

OSError: Unable to open file (unable to open file: name = 'trained_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)