# Piper - Home Automation Sequence Generator

## RNN & Text Generation Algorithms
https://www.tensorflow.org/tutorials/text/text_generation

W pierwszej kolejności zajmiemy się analizą algorymtu generacji tekstu przedstawionego w powyższym tutorialu oraz implementacją prostego generatora tesktu opartego o dzieło Szekspira, jak również poznaniem tego jak operuje sie na narzędziach jakie dostarcza nam TensorFlow.

In [None]:
import tensorflow as tf

import numpy as np
import os
import time

path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [None]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [None]:
# Take a look at the first 250 characters in text
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [None]:
# The unique characters in the file
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

65 unique characters


In [None]:
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '$' :   3,
  '&' :   4,
  "'" :   5,
  ',' :   6,
  '-' :   7,
  '.' :   8,
  '3' :   9,
  ':' :  10,
  ';' :  11,
  '?' :  12,
  'A' :  13,
  'B' :  14,
  'C' :  15,
  'D' :  16,
  'E' :  17,
  'F' :  18,
  'G' :  19,
  ...
}


In [None]:
# Show how the first 13 characters from the text are mapped to integers
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

'First Citizen' ---- characters mapped to int ---- > [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [None]:
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

F
i
r
s
t


In [None]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [None]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [None]:
for input_example, target_example in  dataset.take(1):
  print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
  print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data: 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [None]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step    0
  input: 18 ('F')
  expected output: 47 ('i')
Step    1
  input: 47 ('i')
  expected output: 56 ('r')
Step    2
  input: 56 ('r')
  expected output: 57 ('s')
Step    3
  input: 57 ('s')
  expected output: 58 ('t')
Step    4
  input: 58 ('t')
  expected output: 1 (' ')


In [None]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [None]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [None]:
model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [None]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()
sampled_indices

array([64, 42, 36, 10, 55, 58, 10, 16,  5, 37,  9, 40, 51, 31, 23, 12,  1,
       22, 31,  3, 57,  4, 37, 31,  1, 56, 44,  6, 43, 41, 14, 42, 62, 61,
       20, 34, 22, 42, 38, 25, 29, 59, 16, 51, 62, 39, 23, 32,  6, 12, 18,
        3, 34, 13, 23, 57, 12, 18, 16, 53, 56, 55, 21, 24, 29, 30,  2,  9,
       41, 34, 12, 47, 27, 45, 37, 58, 47, 41, 59, 63, 49, 51, 23,  8, 40,
       62, 45, 35, 21, 44, 16, 30, 52, 27, 40, 31, 52, 60, 37, 35])

In [None]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

Input: 
 'is\nrequiring with a plausible obedience; agree with\nhis demands to the point; only refer yourself to'

Next Char Predictions: 
 "zdX:qt:D'Y3bmSK? JS$s&YS rf,ecBdxwHVJdZMQuDmxaKT,?F$VAKs?FDorqILQR!3cV?iOgYticuykmK.bxgWIfDRnObSnvYW"


In [None]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.1754284


In [None]:
model.compile(optimizer='adam', loss=loss)

In [None]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
EPOCHS=10
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.1754284


In [None]:
model.compile(optimizer='adam', loss=loss)

In [None]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_10'

In [None]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_1 (Dense)              (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [None]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [None]:
print(generate_text(model, start_string=u"ROMEO: "))

ROMEO: this new cardinall
Is through the like deeds, without that there are gentlemening,
We may capt upon you.

First Senator:
Sir, she well the nobility:
Then lies that ve up Hereford hurts in 'em!

JULIET: our general
is the parish'd prisoner;
Not what is Ely up-rence? prithee, pray him to his lady.

HORTENSIO:
Had I been the daxENEN:
and most deserve cogestrie!

ISABELLA:
Yes; but, I advangan: sure, thank young ignorance,
And now your stand, whom I, 'greed!

TYRRKEd her no more are heavy piffulling
pride must saying with the fire,
And slain by thinks that one sold mel next day, if not, I play
So many sin that we did show
Dread weep'st to see in all:
His name is Lawillist: s how I may banishment to you;
Look, what love?

Servant:
Ay, sir; I humbly I do deliver,
The crown of justice against himself:
With all of ours, the end of enemies,
Make warmer'd all the very difference of my heart.
Thine eye and fear: our sadders that's before,
You hugh'd a mother!

COgapter:
It was my fanny:
As

### Wnioski


*   Generacja tesktu opisanego w przykaładzie powyżej jest na zadowalającym poziomie, pomimo tego, że jego treść jest bez sensu prawdopodobieństwo napisania czegoś podobnego przez człowieka jest całkiem spore, same słowa są generowane poprawnie, a więc przy inne strukturze niż litery algorytm powinien sprawdzić się równie przyzwoicie, co jest idealne dla naszych celów.
*   Sama generacja tekstu nie jest skomplikowana jeśli chodzi o sam algorytm
*   Algorytm jest bardzo zasobożerny w kwestii, znaczym przyspieszeniem okazuje sie przełączenie sesji na kartę graficzną (Zakładka Runtime -> Change runtime type -> GPU)





## Generacja sekwencji DataFrame'ów Pandasowych na podstawie algorytmu generacji tekstu RNN
W tej części zajmiemy się zdefiniowaniem algorymtu na nasze potrzeby, czyli generację sekwencji automatyzacji domu, poprzez odtworzenie algorymtu generacji tekstu lecz dla innej struktury danych jaką będzie DataFrame z biblioteki Pandas. Zrobimy to w krokach takich jak w algorytmie poprzedzającym:
1.   Ładowanie danych z Dysku Google
2.   Przesunięcie w DataFramie Pandasowym (przygotowujemy dwa data frame'y na wzór przesuniętej tablicy np. X=[1,2,3,4] Y=[2,3,4,1]
3.   Trenowanie modelu
4.   Wyznaczenie funkcji straty
5.   Stworzenie funkcji generującej


### Import przygotowanych danych z naszego dysku google
Wykorzystywane dane są wygenerowane z naszego innego generatora: https://github.com/jakubriegel/piper-data-generator. 
Tak spreparowane dane zapisujemy do pliku CSV na naszym dysku google po czym kopijuemy link do ich udostępniania.

In [None]:
# Code to read csv file into Colaboratory:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import pandas as pd
import tensorflow as tf

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

link = 'https://drive.google.com/open?id=1pLSHaV5pRmNWm-FN5Upea4Ihi8cMIOsU'
fluff, id = link.split('=')
print (id) # Verify that you have everything after '='

downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('sensors_data.csv')  

header_list = ["timestamp", "sensor", "action"]
sensors_df = pd.read_csv('sensors_data.csv', names=header_list)

# Dataset is now stored in a Pandas Dataframe
sensors_df.head()

1pLSHaV5pRmNWm-FN5Upea4Ihi8cMIOsU


Unnamed: 0,timestamp,sensor,action
0,1589144709,outdoor_gate_1_switch,gate_open
1,1589144709,kitchen_light_1_switch,light_on
2,1589144709,bedroom_2_light_1_switch,light_on
3,1589145425,outdoor_gate_1_switch,gate_close
4,1589145585,corridor_light_1_sensor,light_on


### Konwersja danych

In [None]:
sensors_df['sensors_with_action'] = sensors_df['sensor'] + '_' + sensors_df['action']
sensors_df.head()

Unnamed: 0,timestamp,sensor,action,sensors_with_action
0,1589144709,outdoor_gate_1_switch,gate_open,outdoor_gate_1_switch_gate_open
1,1589144709,kitchen_light_1_switch,light_on,kitchen_light_1_switch_light_on
2,1589144709,bedroom_2_light_1_switch,light_on,bedroom_2_light_1_switch_light_on
3,1589145425,outdoor_gate_1_switch,gate_close,outdoor_gate_1_switch_gate_close
4,1589145585,corridor_light_1_sensor,light_on,corridor_light_1_sensor_light_on


In [None]:
sensors_df['sensors_with_action_code'] = pd.Categorical(sensors_df['sensors_with_action'])
categories = pd.Categorical(sensors_df['sensors_with_action_code'])
CATEGORIES_AMOUNT = len(categories.categories.values)
print('There is', CATEGORIES_AMOUNT, 'unique categories')

There is 59 unique categories


In [None]:
categories.describe

<bound method Categorical.describe of [outdoor_gate_1_switch_gate_open, kitchen_light_1_switch_light_on, bedroom_2_light_1_switch_light_on, outdoor_gate_1_switch_gate_close, corridor_light_1_sensor_light_on, ..., general_ac_temp_up, living_room_light_1_switch_light_on, living_room_light_1_switch_light_off, living_room_light_1_switch_light_on, living_room_light_1_switch_light_off]
Length: 10000
Categories (59, object): [bathroom_light_1_switch_light_off, bathroom_light_1_switch_light_on,
                          bathroom_light_2_switch_light_off, bathroom_light_2_switch_light_on, ...,
                          outdoor_light_1_sensor_light_off, outdoor_light_1_sensor_light_on,
                          outdoor_light_2_sensor_light_off, outdoor_light_2_sensor_light_on]>

In [None]:
categories_dict = dict( enumerate(sensors_df['sensors_with_action_code'].cat.categories ) )
categories_dict

{0: 'bathroom_light_1_switch_light_off',
 1: 'bathroom_light_1_switch_light_on',
 2: 'bathroom_light_2_switch_light_off',
 3: 'bathroom_light_2_switch_light_on',
 4: 'bedroom_1_blind_1_switch_blind_down',
 5: 'bedroom_1_blind_1_switch_blind_up',
 6: 'bedroom_1_light_1_switch_light_off',
 7: 'bedroom_1_light_1_switch_light_on',
 8: 'bedroom_1_light_2_switch_light_off',
 9: 'bedroom_1_light_2_switch_light_on',
 10: 'bedroom_1_light_3_switch_light_off',
 11: 'bedroom_1_light_3_switch_light_on',
 12: 'bedroom_2_blind_1_switch_blind_down',
 13: 'bedroom_2_blind_1_switch_blind_up',
 14: 'bedroom_2_light_1_switch_light_off',
 15: 'bedroom_2_light_1_switch_light_on',
 16: 'bedroom_2_light_2_switch_light_off',
 17: 'bedroom_2_light_2_switch_light_on',
 18: 'bedroom_3_blind_1_switch_blind_down',
 19: 'bedroom_3_blind_1_switch_blind_up',
 20: 'bedroom_3_light_1_switch_light_off',
 21: 'bedroom_3_light_1_switch_light_on',
 22: 'bedroom_3_light_2_switch_light_off',
 23: 'bedroom_3_light_2_switch_li

In [None]:
def getCategory(category_id):
  return categories_dict[category_id]

getCategory(42)

'living_room_blind_3_switch_blind_up'

In [None]:
sensors_df['sensors_with_action_code'] = sensors_df.sensors_with_action_code.cat.codes
sensors_df.head()

Unnamed: 0,timestamp,sensor,action,sensors_with_action,sensors_with_action_code
0,1589144709,outdoor_gate_1_switch,gate_open,outdoor_gate_1_switch_gate_open,52
1,1589144709,kitchen_light_1_switch,light_on,kitchen_light_1_switch_light_on,32
2,1589144709,bedroom_2_light_1_switch,light_on,bedroom_2_light_1_switch_light_on,15
3,1589145425,outdoor_gate_1_switch,gate_close,outdoor_gate_1_switch_gate_close,51
4,1589145585,corridor_light_1_sensor,light_on,corridor_light_1_sensor_light_on,25


Jako wynik końcowy konwersji mamy akcje w postaci 'sensor_akcja' co daje nam tylko jedą daną, pozostałe kolumny nie będą nam potrzebne, jako, że zależy nam tylko na kolejności timestamp również nie będzie nam potrzebny. Przetwórzmy nasze dane do postaci X, Y tak, aby na podstawie X przewidywać naszego Y.

### Przygotowanie danych do uczenia - przykład Pandas

In [None]:
Y_data = sensors_df.iloc[1:10000]
Y_data.head()

Unnamed: 0,timestamp,sensor,action,sensors_with_action,sensors_with_action_code
1,1589144709,kitchen_light_1_switch,light_on,kitchen_light_1_switch_light_on,32
2,1589144709,bedroom_2_light_1_switch,light_on,bedroom_2_light_1_switch_light_on,15
3,1589145425,outdoor_gate_1_switch,gate_close,outdoor_gate_1_switch_gate_close,51
4,1589145585,corridor_light_1_sensor,light_on,corridor_light_1_sensor_light_on,25
5,1589145774,bedroom_1_light_1_switch,light_on,bedroom_1_light_1_switch_light_on,7


In [None]:
sensors_df.tail()

Unnamed: 0,timestamp,sensor,action,sensors_with_action,sensors_with_action_code
9995,1594049507,general_ac,temp_up,general_ac_temp_up,26
9996,1594050680,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9997,1594050913,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43
9998,1594052292,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9999,1594053306,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43


In [None]:
Y_data.tail()

Unnamed: 0,timestamp,sensor,action,sensors_with_action,sensors_with_action_code
9995,1594049507,general_ac,temp_up,general_ac_temp_up,26
9996,1594050680,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9997,1594050913,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43
9998,1594052292,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9999,1594053306,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43


In [None]:
Y_data = pd.concat([Y_data, sensors_df.iloc[0:1]], ignore_index=True)
Y_data.tail()

Unnamed: 0,timestamp,sensor,action,sensors_with_action,sensors_with_action_code
9995,1594050680,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9996,1594050913,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43
9997,1594052292,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9998,1594053306,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43
9999,1589144709,outdoor_gate_1_switch,gate_open,outdoor_gate_1_switch_gate_open,52


In [None]:
Y_data = Y_data.reset_index(drop=True)
Y_data.tail()

Unnamed: 0,timestamp,sensor,action,sensors_with_action,sensors_with_action_code
9995,1594050680,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9996,1594050913,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43
9997,1594052292,living_room_light_1_switch,light_on,living_room_light_1_switch_light_on,44
9998,1594053306,living_room_light_1_switch,light_off,living_room_light_1_switch_light_off,43
9999,1589144709,outdoor_gate_1_switch,gate_open,outdoor_gate_1_switch_gate_open,52


Teraz mamy dokładnie dwa DataFrame'y przesunięte o jeden wiersz danych dokładnie tak samo jak w algorytmie generacji tekstu. Złączmy to w jeden dataSet, który potem przekonwertujemy do postaci dataSet'u wykorzystywanego w tensorFlow.

In [None]:
new_dataSet = pd.DataFrame()
new_dataSet['X'] = sensors_df['sensors_with_action_code']
new_dataSet['Y'] = Y_data['sensors_with_action_code']
new_dataSet.head()

Unnamed: 0,X,Y
0,52,32
1,32,15
2,15,51
3,51,25
4,25,7


### Tworzenie próbek treningowych


In [None]:
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(new_dataSet)//(seq_length+1)
print(examples_per_epoch)

99


In [None]:
sequence_dataset = tf.data.Dataset.from_tensor_slices(sensors_df['sensors_with_action_code'].values)
sequence_dataset

<TensorSliceDataset shapes: (), types: tf.int8>

In [None]:
sequences = sequence_dataset.batch(seq_length+1, drop_remainder=True)
sequences

for item in sequences.take(1):
  print(item)

tf.Tensor(
[52 32 15 51 25  7  9 34 31 21 14 20  8 21  6  7 23 24 33 15  6 14 20 15
 14  7 22  9 11  5 10  8 44 46  4 43 44 48 38 43 47  6 37 44 43 45  7 44
  6 43 44 43  1  0 32 21 25  7  9  6 24 31  7 20  6 25 24 26 32 26 34  8
  1 33 34 31  0 26  1 44 33  0  1 46  0 43 44 48 38 43 37 44 47 21 45 46
 20 48 45 21 43], shape=(101,), dtype=int8)


In [None]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [None]:
for input_example, target_example in  dataset.take(1):
  print ('Input data: ', input_example.numpy())
  print ('Target data:', target_example.numpy())

Input data:  [52 32 15 51 25  7  9 34 31 21 14 20  8 21  6  7 23 24 33 15  6 14 20 15
 14  7 22  9 11  5 10  8 44 46  4 43 44 48 38 43 47  6 37 44 43 45  7 44
  6 43 44 43  1  0 32 21 25  7  9  6 24 31  7 20  6 25 24 26 32 26 34  8
  1 33 34 31  0 26  1 44 33  0  1 46  0 43 44 48 38 43 37 44 47 21 45 46
 20 48 45 21]
Target data: [32 15 51 25  7  9 34 31 21 14 20  8 21  6  7 23 24 33 15  6 14 20 15 14
  7 22  9 11  5 10  8 44 46  4 43 44 48 38 43 47  6 37 44 43 45  7 44  6
 43 44 43  1  0 32 21 25  7  9  6 24 31  7 20  6 25 24 26 32 26 34  8  1
 33 34 31  0 26  1 44 33  0  1 46  0 43 44 48 38 43 37 44 47 21 45 46 20
 48 45 21 43]


In [None]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, getCategory(input_idx.numpy())))
    print("  expected output: {} ({:s})".format(target_idx, getCategory(target_idx.numpy())))

Step    0
  input: 52 (outdoor_gate_1_switch_gate_open)
  expected output: 32 (kitchen_light_1_switch_light_on)
Step    1
  input: 32 (kitchen_light_1_switch_light_on)
  expected output: 15 (bedroom_2_light_1_switch_light_on)
Step    2
  input: 15 (bedroom_2_light_1_switch_light_on)
  expected output: 51 (outdoor_gate_1_switch_gate_close)
Step    3
  input: 51 (outdoor_gate_1_switch_gate_close)
  expected output: 25 (corridor_light_1_sensor_light_on)
Step    4
  input: 25 (corridor_light_1_sensor_light_on)
  expected output: 7 (bedroom_1_light_1_switch_light_on)


### Tworzenie zestatów treningowych (tzw. batches)

In [None]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int8, tf.int8)>

### Budowanie modelu

In [None]:
# Length of the vocabulary (amount of categories)
vocab_size = CATEGORIES_AMOUNT

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [None]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

### Wypróbowanie modelu

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 59) # (batch_size, sequence_length, vocab_size)


In [None]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (64, None, 256)           15104     
_________________________________________________________________
gru_2 (GRU)                  (64, None, 1024)          3938304   
_________________________________________________________________
dense_2 (Dense)              (64, None, 59)            60475     
Total params: 4,013,883
Trainable params: 4,013,883
Non-trainable params: 0
_________________________________________________________________


In [None]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [None]:
sampled_indices

array([43, 47, 12, 28, 22, 38, 37, 58, 50, 14, 27,  4, 16, 31,  6, 11, 11,
       40, 54, 18,  4,  3, 50,  1, 57, 55, 20,  7, 37, 47, 23, 31, 54, 53,
       29, 56, 10, 53, 43, 20, 19,  0, 41, 31, 47, 33,  5, 51,  6, 58,  5,
       26, 45,  3, 21, 33, 48, 33, 42, 11, 16, 48, 16, 24, 50,  9,  5, 22,
       54,  6, 55,  8,  1,  0, 30,  2, 54,  7, 31, 44, 10,  3,  0, 42, 22,
       43, 29, 42,  4, 36, 28, 25, 43, 28, 39, 14, 17, 39, 53, 42])

In [None]:
print("Input: \n")
for i in input_example_batch[0][:10]:
  print(getCategory(i.numpy()))

print("\nPredicted: \n")
for i in sampled_indices[:10]:
  print(getCategory(i))

Input: 

living_room_light_3_switch_light_off
living_room_light_2_switch_light_off
living_room_light_2_switch_light_on
living_room_light_1_switch_light_off
living_room_light_2_switch_light_off
outdoor_gate_1_switch_gate_open
bedroom_3_light_1_switch_light_on
outdoor_gate_1_switch_gate_close
bedroom_1_light_1_switch_light_on
bedroom_1_light_2_switch_light_on

Predicted: 

living_room_light_1_switch_light_off
living_room_light_3_switch_light_off
bedroom_2_blind_1_switch_blind_down
kitchen_blind_1_switch_blind_up
bedroom_3_light_2_switch_light_off
living_room_blind_1_switch_blind_up
living_room_blind_1_switch_blind_down
outdoor_light_2_sensor_light_on
living_room_tv_on
bedroom_2_light_1_switch_light_off


### Dodanie do modelu funkcji straty, optymizera oraz tzw. checkpointów

In [None]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 59)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.0775733


In [None]:
model.compile(optimizer='adam', loss=loss)

In [None]:
import os
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

### Trening modelu

In [None]:
EPOCHS = 10
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Zmiana minimalnego rozmiaru próbki predykcyjnej i przebudowanie modelu

In [None]:
tf.train.latest_checkpoint(checkpoint_dir)
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (1, None, 256)            15104     
_________________________________________________________________
gru_3 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_3 (Dense)              (1, None, 59)             60475     
Total params: 4,013,883
Trainable params: 4,013,883
Non-trainable params: 0
_________________________________________________________________


### Stworzenie funkcji generującej sekwencje

In [None]:
def generate_sequences(model, start_sequence_event_id):
  num_generate = 10 # Liczba zdarzeń w sekwencji do wygenerowania

  # Converting our start frame to vector of numbers (vectorizing)
  input_eval = [start_sequence_event_id]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  generated_sequences = []

  # Low temperatures results in more predictable events.
  # Higher temperatures results in more surprising events.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      generated_sequences.append(getCategory(predicted_id))

  generated_sequences.insert(0, getCategory(start_sequence_event_id))
  return  generated_sequences

In [None]:
print(generate_sequences(model, start_sequence_event_id=26))

['general_ac_temp_up', 'bedroom_2_light_2_switch_light_on', 'living_room_light_2_switch_light_on', 'bedroom_1_light_3_switch_light_off', 'kitchen_blind_1_switch_blind_up', 'kitchen_blind_2_switch_blind_up', 'bathroom_light_1_switch_light_on', 'living_room_light_3_switch_light_on', 'bedroom_2_light_2_switch_light_on', 'bedroom_2_blind_1_switch_blind_up', 'living_room_light_2_switch_light_off']


In [None]:
print(generate_sequences(model, start_sequence_event_id=3))

['bathroom_light_2_switch_light_on', 'bedroom_1_light_1_switch_light_off', 'living_room_light_1_switch_light_on', 'bedroom_1_light_3_switch_light_on', 'living_room_blind_1_switch_blind_up', 'bedroom_3_blind_1_switch_blind_up', 'bedroom_2_light_1_switch_light_off', 'bedroom_3_light_1_switch_light_on', 'bedroom_2_light_1_switch_light_on', 'kitchen_light_1_switch_light_on', 'bedroom_1_light_1_switch_light_off']


In [None]:
print(generate_sequences(model, start_sequence_event_id=50))

['living_room_tv_on', 'living_room_blind_3_switch_blind_up', 'bedroom_2_light_2_switch_light_off', 'bedroom_1_blind_1_switch_blind_up', 'bedroom_1_light_1_switch_light_off', 'corridor_light_1_sensor_light_on', 'outdoor_gate_2_switch_gate_open', 'general_ac_temp_up', 'general_ac_temp_up', 'bedroom_2_light_1_switch_light_off', 'outdoor_gate_1_switch_gate_open']
