In [7]:
# ############################=> BEGIN IMPORT SECTION <=#############################
import os
import pandas as pd
import numpy as np
import random
from sklearn import preprocessing
from collections import deque
import time
import math
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, BatchNormalization
from keras.callbacks import TensorBoard, ModelCheckpoint


# ############################=> END IMPORT SECTION <=#############################


# ############################=> BEGIN VARIABLE SECTION <=#############################

# creating variable, to keep prediction parameters
# using sixty seconds data to predict
SEQ_LEN = 60
# three minutes period for prediction
FUTURE_PERIOD_PREDICT = 3
# the type of data we trying to predict
STREET_TO_PREDICT = "georgia"
EPOCHS = 20
BATCH_SIZE = 100
# Name of the model
NAME = f"{STREET_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

# ############################=> END VARIABLE SECTION <=#############################

# ############################=> BEGIN FUNCTION SECTION <=#############################
def classify(current_co2_amount, future_co2_amount):
    if float(future_co2_amount) > float(current_co2_amount):
        return 1
    else:
        return 0


def preprocess_df(df):
    # drop the future columns
    df = df.drop(f"future_pollution_{STREET_TO_PREDICT}", 1)

    for col in df.columns:
        if col != "target":
            # normalize the data, because each columns has different value for different type of data
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(pd.notnull(df[col].values))
    df.dropna(inplace=True)
    sequential_data = []
    # it holds data for 60 seconds and popout the old item after this time
    prev_days = deque(maxlen=SEQ_LEN)

    for i in df.values:
        # not include the last item which is target
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
    random.shuffle(sequential_data)

    # balance the data into higherCO2 and  lowerCO2

    higherPollution = []
    lowerPollution = []

    for seq, target in sequential_data:
        if target == 0:
            lowerPollution.append([seq, target])
        elif target == 1:
            higherPollution.append([seq, target])

    random.shuffle(higherPollution)
    random.shuffle(lowerPollution)

    lower = min(len(higherPollution), len(lowerPollution))

    higherPollution = higherPollution[:lower]
    lowerPollution = lowerPollution[:lower]

    sequential_data = higherPollution + lowerPollution
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)

    return np.array(X), y

# ############################=> END FUNCTION SECTION <=#############################

data_frame = pd.DataFrame()
# reading the data
fileNames = ["pollution", "traffic"]
for fileName in fileNames:
    dataset_name = f"{fileName}.csv"
    df = pd.read_csv(dataset_name, sep=";")
    # converting the Time columns values to time object
    df.Time = pd.to_datetime(df.Time)
    # setting time as index
    df.set_index("Time", inplace=True)

    # combing the data into the data_frame
    if len(data_frame) == 0:
        data_frame = df
    else:
        data_frame = data_frame.join(df)



print(data_frame.head())

print(data_frame.columns)


# combing the average of the two Cam
data_frame[f"totalPopulation_{STREET_TO_PREDICT}"] = data_frame[[f"mlk-{STREET_TO_PREDICT}-cam-3", f"mlk-{STREET_TO_PREDICT}-cam-1"]].sum(axis=1)

data_frame = data_frame[[f"totalPopulation_{STREET_TO_PREDICT}", f"mlk-{STREET_TO_PREDICT}"]]

# if there are gaps in data, use previously known values, take look at this
data_frame.fillna(method="ffill", inplace=True)
data_frame.dropna(inplace=True)

data_frame[f"future_pollution_{STREET_TO_PREDICT}"] = data_frame[f"mlk-{STREET_TO_PREDICT}"].shift(-FUTURE_PERIOD_PREDICT)


print(data_frame.head(100))
print(data_frame.columns)
# renaming f"mlk-{STREET_TO_PREDICT}" to  current pollution
data_frame.rename(columns={f"mlk-{STREET_TO_PREDICT}": f"current_pollution_{STREET_TO_PREDICT}"}, inplace=True)
print(data_frame[[f"current_pollution_{STREET_TO_PREDICT}"]].head(20))
print(data_frame[[f"future_pollution_{STREET_TO_PREDICT}"]].head())

print(data_frame.columns)
# creating the target data, by making a list of labels
data_frame["target"] = list(map(classify, data_frame[f"current_pollution_{STREET_TO_PREDICT}"], data_frame[f"future_pollution_{STREET_TO_PREDICT}"]))

print(data_frame.head(100))

# creating training data
times = sorted(data_frame.index.values)

# getting the last 35% of time data
last_35pct = times[-int(0.35 * len(times))]
print(last_35pct)

# creating validation data that are greater than last 5%
validation_data_df = data_frame[(data_frame.index >= last_35pct)]
# creating training data
training_data_df = data_frame[(data_frame.index < last_35pct)]

#preprocess_df(training_data_df)
train_x, train_y = preprocess_df(training_data_df)
test_x, test_y = preprocess_df(validation_data_df)

print(validation_data_df.head())

print(f"train dta : {len(train_x)}  validation : {len(test_x)}")
print(f"low pollution: {train_y.count(0)}, high: {train_y.count(1)}")
print(f"VALIDATION low pollution : { test_y.count(0)}, high : {test_y.count(1)}")



model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

# Dense layer
model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())


model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt = keras.optimizers.Adam(lr=0.001, decay=1e-6)
# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
 )

#plot_model(model, to_file="model.png")

# TensorBoard callback, to see the train data graph  command : board --logdir=logs
tensorboard = TensorBoard(log_dir=f"logs/{NAME}")

# unique file name that will include the epoch and the validation acc for that epoch
filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"
# saves only the best ones
checkpoint = ModelCheckpoint("models/{}.model".format(filepath,
                            monitor='val_acc', verbose=1, save_best_only=True, mode='max'))


# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(test_x, test_y),
    callbacks=[tensorboard, checkpoint],
)

# Score model
score = model.evaluate(test_x, test_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("{}".format(NAME))



                     mlk-peeples  mlk-magnolia  mlk-lindsay  mlk-houston  \
Time                                                                       
2019-03-25 20:50:00     8.572857      7.981429     9.332500     8.295625   
2019-03-25 21:00:00     7.873750      9.218125    10.310000     9.184286   
2019-03-25 21:10:00     8.982143      9.258333    11.130625     7.869375   
2019-03-25 21:20:00     8.585000     10.703125    10.182143     8.082143   
2019-03-25 21:30:00    10.153571     10.172857    11.306250     9.338750   

                     mlk-georgia  mlk-douglas  mlk-central  mlk-georgia-cam-3  \
Time                                                                            
2019-03-25 20:50:00          NaN     7.454375     7.227143                  0   
2019-03-25 21:00:00          NaN     8.806429     9.318750                  0   
2019-03-25 21:10:00     6.954375     7.821875    10.832143                  0   
2019-03-25 21:20:00     7.317500     8.599286     9.556875    



Train on 398 samples, validate on 210 samples


InvalidArgumentError: No OpKernel was registered to support Op 'CudnnRNN' used by node cu_dnnlstm_1/CudnnRNN (defined at /Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/layers/cudnn_recurrent.py:517) with these attrs: [dropout=0, seed=87654321, T=DT_FLOAT, input_mode="linear_input", direction="unidirectional", rnn_mode="lstm", is_training=true, seed2=0]
Registered devices: [CPU]
Registered kernels:
  <no registered kernels>

	 [[node cu_dnnlstm_1/CudnnRNN (defined at /Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/layers/cudnn_recurrent.py:517) ]]

Caused by op 'cu_dnnlstm_1/CudnnRNN', defined at:
  File "/Users/alnouralharin/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/asyncio/base_events.py", line 528, in run_forever
    self._run_once()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/asyncio/base_events.py", line 1764, in _run_once
    handle._run()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-67f48ab8c7f7>", line 171, in <module>
    model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/engine/sequential.py", line 165, in add
    layer(x)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/layers/recurrent.py", line 532, in __call__
    return super(RNN, self).__call__(inputs, **kwargs)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/engine/base_layer.py", line 457, in __call__
    output = self.call(inputs, **kwargs)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/layers/cudnn_recurrent.py", line 90, in call
    output, states = self._process_batch(inputs, initial_state)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/layers/cudnn_recurrent.py", line 517, in _process_batch
    is_training=True)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1636, in __call__
    input_data, input_h, input_c, params, is_training=is_training)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1527, in __call__
    seed=self._seed)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1014, in _cudnn_rnn
    outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn(**args)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py", line 142, in cudnn_rnn
    seed2=seed2, is_training=is_training, name=name)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/Users/alnouralharin/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'CudnnRNN' used by node cu_dnnlstm_1/CudnnRNN (defined at /Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/layers/cudnn_recurrent.py:517) with these attrs: [dropout=0, seed=87654321, T=DT_FLOAT, input_mode="linear_input", direction="unidirectional", rnn_mode="lstm", is_training=true, seed2=0]
Registered devices: [CPU]
Registered kernels:
  <no registered kernels>

	 [[node cu_dnnlstm_1/CudnnRNN (defined at /Users/alnouralharin/anaconda3/lib/python3.7/site-packages/keras/layers/cudnn_recurrent.py:517) ]]
