In [1]:
## import dependencies 
import tensorflow as tf
import numpy as np 
import pandas as pd 
from datetime import datetime, date, time
import os 
import glob
import csv
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
## importing dependencies 
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS 

## define influxdb paramters
token = "A1hvR7pmPINW66Wuxbb7dpwR74TukBJIC7IuP3ADhr4xTx0Zzx-JZxzgInUUFbikMsg_bNo2TI4QnbXnUIOkDg=="
org = "ThirdYearProject"
bucket = "de75cd94d8127ada"

## connect to influxdb
client = InfluxDBClient(url="http://localhost:8086", token=token, org=org)

In [3]:
## query data

query_api = client.query_api()

query1_train = 'from(bucket:"MicrosoftData")\
|> range(start:  2015-01-01T00:00:00Z, stop: 2016-05-01T11:59:59Z)\
|> filter(fn: (r) => r._measurement == "Network Prediction"\
and r.Channel == "channel_1144" and r.Segment == "31")\
|> pivot(rowKey:["_time"], columnKey:["_field"], valueColumn:"_value")\
|> keep(columns: ["_time", "Q-Factor"])'

query2_train = 'from(bucket:"MicrosoftData")\
|> range(start:  2015-01-01T00:00:00Z, stop: 2016-05-01T11:59:59Z)\
|> filter(fn: (r) => r._measurement == "Network Prediction"\
and r.Channel == "channel_1147" and r.Segment == "31")\
|> pivot(rowKey:["_time"], columnKey:["_field"], valueColumn:"_value")\
|> keep(columns: ["_time", "Q-Factor"])'

def dataReady(query):
    data = query_api.query_data_frame(query)
    data.set_index("_time", inplace=True)
    data.index = pd.to_datetime(data.index)
    data.drop('result', axis=1, inplace=True)
    data.drop('table', axis=1, inplace=True)
    return data

data1  = dataReady(query1_train)
data2  = dataReady(query2_train)

In [4]:
data1.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Q-Factor,35074.0,13.319445,0.503416,10.65,13.12,13.54,13.72,14.25


In [5]:
data1.head()

Unnamed: 0_level_0,Q-Factor
_time,Unnamed: 1_level_1
2015-02-02 12:00:00+00:00,12.56
2015-02-02 12:15:00+00:00,12.54
2015-02-02 12:30:00+00:00,12.55
2015-02-02 12:45:00+00:00,12.56
2015-02-02 13:00:00+00:00,12.6


In [6]:
data2.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Q-Factor,35074.0,13.265218,0.495046,10.29,13.13,13.48,13.67,14.1


In [7]:
data2.head()

Unnamed: 0_level_0,Q-Factor
_time,Unnamed: 1_level_1
2015-02-02 12:00:00+00:00,12.53
2015-02-02 12:15:00+00:00,12.54
2015-02-02 12:30:00+00:00,12.52
2015-02-02 12:45:00+00:00,12.49
2015-02-02 13:00:00+00:00,12.48


In [8]:
#data_train = pd.concat([data1, data2, data3, data4], axis=1)
#data_train.columns = ["Data1", "Data2", "Data3", "Data4"]
#df = pd.concat([data1, data2], axis=1)
#df.columns = ["Data1", "Data2"]
#df.head()

In [9]:
#for col in data_train:
    #print(data_train[col].isnull().sum())

In [10]:
#function to create input features
def ts_data_generator(data, window_size, batch_size, shuffle_buffer):
    ts_data = tf.data.Dataset.from_tensor_slices(data)
    ts_data = ts_data.window(window_size + 1, shift=1, drop_remainder=True)
    ts_data = ts_data.flat_map(lambda window: window.batch(window_size+1))
    ts_data = ts_data.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
    ts_data = ts_data.batch(batch_size).prefetch(1)
    return ts_data #expanding data into tensors 

## required parameters
WINDOW_SIZE = 360
BATCH_SIZE = 64
SHUFFLE_BUFFER = 35000

In [11]:
## converting data to numpy for better/faster processing 
time_index_data1 = np.array(data1.index)
data1_np = np.array(data1)
time_index_data2 = np.array(data2.index)
data2_np = np.array(data2)

In [12]:
#splitting train into train/validation 
n=len(data1_np)
train_data1 = data1_np[:int(n*0.6)]
train_time1 = time_index_data1[:int(n*0.6)]
val_data1 = data1_np[int(n*0.6):int(n*0.7)]
val_time1 = time_index_data1[int(n*0.6):int(n*0.7)]
test_data1 = data1_np[int(n*0.7):]
test_time1 = time_index_data1[int(n*0.7):]

train_data2 = data2_np[:int(n*0.6)]
train_time2 = time_index_data2[:int(n*0.6)]
val_data2 = data2_np[int(n*0.6):int(n*0.7)]
val_time2 = time_index_data2[int(n*0.6):int(n*0.7)]
test_data2 = data2_np[int(n*0.7):]
test_time2 = time_index_data2[int(n*0.7):]

In [16]:
# building model 
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(filters=32, kernel_size=5, strides=1, padding="causal", activation="relu",
        input_shape=[None,1]),
    tf.keras.layers.LSTM(64, return_sequences=True),

    tf.keras.layers.LSTM(64, return_sequences=True), 

    tf.keras.layers.Dense(30, activation="relu"),  

    tf.keras.layers.Dense(10, activation="relu"), 

    tf.keras.layers.Dense(1)])

# optimiser algorithm adjust weights of network's neurons and learning rate
# reduces loss and improves overall accuracy
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8*10**(epoch))
optimiser = tf.keras.optimizers.SGD(learning_rate=1e-8, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(), optimizer=optimiser, metrics=["mae"])

## training model  
def trainModel(model, train_data, val_data):
              
    # generating input and output features for training and testing set 
    tensor_train_data = tf.expand_dims(train_data, axis=-1)
    tensor_val_data = tf.expand_dims(val_data, axis=-1)

    tensor_train_dataset = ts_data_generator(tensor_train_data, WINDOW_SIZE, BATCH_SIZE, SHUFFLE_BUFFER)
    tensor_val_dataset = ts_data_generator(tensor_val_data, WINDOW_SIZE, BATCH_SIZE, SHUFFLE_BUFFER)

    history = model.fit(tensor_train_dataset, epochs=10, validation_data=tensor_val_dataset, callbacks=[lr_schedule], verbose=0)
    
    #determining optimum learning rate for model
    lrs = 1e-3 * (10 ** (np.arange(100))/20)
    plt.semilogx(lrs, history.history['loss'])
    plt.axis([1e-8, 1e-3, 0, 300])
    
    ## losses of the model
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    return model
    
model = trainModel(model, train_data1, val_data1)
model = trainModel(model, train_data2, val_data2)

KeyboardInterrupt: 

In [None]:
## making a prediction
def model_forecast(model, data, window_size, batch_size):
    ds = tf.data.Dataset.from_tensor_slices(data)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.batch(batch_size).prefetch(1)
    forecast = model.predict(ds)#returns learned label for each object in array 
    #dimensions:(no. windows, window size, no. features)
    return forecast

#adding dimension to test_data so it matches 3 dimensional forecast
forecast = model_forecast(model, data1_np[...,np.newaxis], WINDOW_SIZE, BATCH_SIZE)

In [None]:
print(forecast.shape)

In [None]:
predicted_values = forecast[int(n*0.7)-WINDOW_SIZE:-1,-1,0]#batch size (: all) time slice (-1 last one) features (only one, so first one)

print(predicted_values.shape)

In [None]:
#print(predicted_values.shape)
#predicted_values = np.squeeze(predicted_values)
#print(predicted_values.shape)

In [None]:
#print(predicted_values.dtype)

In [None]:
error = tf.keras.metrics.mean_absolute_error(test_data1, predicted_values).numpy()
print(error)

In [None]:
# comparison graph 
plt.plot(test_data1)
plt.plot(predicted_values)
plt.title('Prediction vs Test')
plt.ylabel('Q-Factor')
plt.xlabel('Time')
plt.legend(['test', 'predictions'], loc='upper left')
plt.show()