<a href="https://colab.research.google.com/github/aslesani/pgmpy_fork/blob/master/src/default_test/imdb_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''Trains an LSTM model on the IMDB sentiment classification task.
The dataset is actually too small for LSTM to be of any advantage
compared to simpler, much faster methods such as TF-IDF + LogReg.
# Notes
- RNNs are tricky. Choice of batch size is important,
choice of loss and optimizer is critical, etc.
Some configurations won't converge.
- LSTM loss decrease patterns during training can be quite different
from what you see with CNNs/MLPs/etc.
'''
from __future__ import print_function

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM, SimpleRNN
from keras.datasets import imdb
import numpy as np

In [2]:
import matplotlib.pyplot as plt

def plot_train_val_graph(history):
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  epochs = range(1, len(loss) + 1)
  print('epochs:' , epochs)
  plt.figure()
  plt.plot(epochs, loss, 'bo', label='Training loss')
  plt.plot(epochs, val_loss, 'b', label='Validation loss')
  plt.title('Training and validation loss')
  plt.legend()
  plt.show()


In [3]:
def get_max_len_of_sequences(list_of_sequences):
  lengths = [len(list_of_sequences[i]) for i in range(len(list_of_sequences))]
  return max(lengths) , min(lengths) , lengths

In [4]:
def get_set_of_sensor_events(list_of_sequences):
 
  set_of_sensor_events = set()
  
  for i in range(len(list_of_sequences)):
      set_of_sensor_events = set_of_sensor_events.union(set(list_of_sequences[i]))
  
  return set_of_sensor_events, len(set_of_sensor_events)
  

In [None]:
def read_data_from_CSV_file(dest_file , data_type ,  has_header = False , return_as_pandas_data_frame = False , remove_date_and_time = False , return_header_separately = False , convert_int_columns_to_int = False):
    '''
    this function is a replacement for read_data_from_PCA_output_file and read_data_from_PCA_digitized_file
    with more capabalities.
    
    Parameters:
    ==========
    dest_file: 
    data_type: type of data that should be read  
    has_header = if the file has header, it is set to True. The header is the first line that starts whit '#' character 
    return_as_pandas_data_frame = if True, the return_value is pandas Dataframe, else numpy ndaaray
    
    convert_int_columns_to_int: if the user want to keep date and time columns, then she should 
                                specify data_type as object and then set convert_int_columns_to_int to True
    
    Returns:
    ========
    return_value: type of it is pandas Dataframe or numpy ndaaray
    
    '''
    header = ""
    with open(dest_file,'r') as dest_f:
        data_iter = csv.reader(dest_f, 
                               delimiter = ',')#quotechar = '"')
    
        if has_header:
            header = next(data_iter)
            header[0] = header[0].split('# ')[1] # remove # from first element
        
        
        data = [data for data in data_iter]
    
    if remove_date_and_time:
        data = np.delete(np.delete(data, -1, 1), -1 , 1)

    return_value= np.asarray(data, dtype = data_type)
    
    if convert_int_columns_to_int:
        rows , cols_to_convert = np.shape(return_value)
        
        if remove_date_and_time == False:
            cols_to_convert -=2
        
        for r in range(rows):
            for c in range(cols_to_convert):
                return_value[r,c] = int(return_value[r,c])
        
    
    if return_as_pandas_data_frame:
        return_value = pd.DataFrame(return_value , columns = header)
        
    if return_header_separately:
        return header , return_value
    
    else:   
        return return_value



In [None]:
!git clone https://github.com/aslesani/pgmpy_fork.git


In [None]:
ls

In [None]:
!git clone https://github.com/aslesani/created_dataset.git

In [None]:
!rm -r pgmpy_fork  

In [None]:
cd pgmpy_fork/src/default_test

In [None]:
cd ..

In [None]:
ls

In [3]:
def convert_binary_classes_to_zero_and_one(data):
  
  values = sorted(list(set(data)))
  for i in range(len(data)):
    data[i] = values.index(data[i])

    
  return data

In [7]:
def test_convert_binary_classes_to_zero_and_one():
  data = [2,1,1,1,2]
  data = convert_binary_classes_to_zero_and_one(data)
  print(data)

In [None]:
from keras import backend as K

def mcor(y_true, y_pred):
    #matthews_correlation
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))
    y_pred_neg = 1 - y_pred_pos
 
 
    y_pos = K.round(K.clip(y_true, 0, 1))
    y_neg = 1 - y_pos
 
 
    tp = K.sum(y_pos * y_pred_pos)
    tn = K.sum(y_neg * y_pred_neg)
 
 
    fp = K.sum(y_neg * y_pred_pos)
    fn = K.sum(y_pos * y_pred_neg)
 
 
    numerator = (tp * tn - fp * fn)
    denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
 
 
    return numerator / (denominator + K.epsilon())


def precision(y_true, y_pred):
    """Precision metric.

    Only computes a batch-wise average of precision.

    Computes the precision, a metric for multi-label classification of
    how many selected items are relevant.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    """Recall metric.

    Only computes a batch-wise average of recall.

    Computes the recall, a metric for multi-label classification of
    how many relevant items are selected.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [None]:
!pip install tabulate 

In [4]:
from tabulate import tabulate

#print(tabulate([['Alice', 24], ['Bob', 19]], headers=['algorithm', 'acc']))

def print_list_of_lists(data , headers):
    print(tabulate(data, headers=headers))



In [8]:
def test_print_list_of_lists():
    data = [['Alice', 24], ['Bob', 19]]
    headers=['algorithm', 'acc']
    print_list_of_lists(data , headers)


In [9]:
test_print_list_of_lists()

algorithm      acc
-----------  -----
Alice           24
Bob             19


In [5]:
def imdb_lstm_data_preparation(max_features = 20000, maxlen = 80):
  #max_features = 20000#number_of_events
  # cut texts after this number of words (among top max_features most common words)
  #maxlen = 10#max_seq_len

  print('Loading data...')
  (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
  print(len(x_train), 'train sequences')
  print(len(x_test), 'test sequences')

  #print('before apply pad_sequence, x_train[0]:' , x_train[0])

  print('Pad sequences (samples x time)')
  x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
  x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
  print('x_train shape:', x_train.shape)
  print('x_test shape:', x_test.shape)
  
  return x_train, x_test, y_train, y_test, max_features, maxlen

In [6]:
from read_write import read_sequence_of_bags_CSV_file_with_activity, read_sequence_based_CSV_file_without_activity, read_data_from_CSV_file
from keras.preprocessing.text import Tokenizer

def my_data_preparation(address_to_read):
  
  list_of_data , list_of_persons = read_sequence_based_CSV_file_without_activity(file_address = address_to_read, has_header = True , separate_data_based_on_persons = False, separate_words= False)
  #sensor_events , number_of_events = get_set_of_sensor_events(sequences)
  
  list_of_persons = convert_binary_classes_to_zero_and_one(list_of_persons)
  
  tokenizer = Tokenizer(num_words = 122, filters='!"#$%&()*+,-./:;<=>?@[\]^`{|}~')
  #list_of_data = [r'salam man', r"'M38_off' , 'M38_on'"]
  tokenizer.fit_on_texts(list_of_data)
  sequences = tokenizer.texts_to_sequences(list_of_data)
  
  max_features = 121#number_of_events
  # cut texts after this number of words (among top max_features most common words)
  maxlen = 10#80#max_seq_len

  #print('before apply pad_sequence, x_train[0]:' , x_train[0])

  #80% of data for train and 20% for test
  train_numbers = int(0.8 * len(sequences))
  x_train, y_train = sequences[0: train_numbers] , list_of_persons[0:train_numbers]
  x_test, y_test = sequences[train_numbers+1:] , list_of_persons[train_numbers+1:]

  print('Pad sequences (samples x time)')
  x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
  x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
  print('x_train shape:', x_train.shape)
  print('x_test shape:', x_test.shape)

  return x_train, x_test, y_train, y_test, max_features, maxlen

In [None]:
#! cd pgmpy_fork/src/default_test
#!ls


In [None]:
#!git clone https://github.com/pgmpy/pgmpy 
cd ..
#!ls
#!cd pgmpy/
#pip install -r requirements.txt
#!python setup.py install

In [None]:
mydata = read_data_from_CSV_file(dest_file = address_to_save , data_type = int ,  has_header = False , return_as_pandas_data_frame = False , remove_date_and_time = True , return_header_separately = False , convert_int_columns_to_int = True)
    

In [None]:
max_seq_len, min_seq_len , lens = get_max_len_of_sequences(sequences)
print(max_seq_len, min_seq_len)

In [None]:
print(set(y_train))

In [None]:
x_train = x_train[0:2500]
y_train = y_train[0:2500]
x_test = x_test[2501:3200]
y_test = y_test[2501:3200]



In [None]:
y_train = y_train.tolist()
y_test = y_test.tolist()

In [None]:
print((y_train[0:10]))
print((my_x_train[0:10]))


In [7]:
def create_model_and_apply_on_data(x_train, y_train,x_test, y_test, max_features,embedding_vector_dim = 64, batch_size = 32, epochs = 5, 
                                   loss = 'binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'], plot_train_val_graph = False):
  
  #batch_size = 32

  print('Build model...')
  model = Sequential()
  model.add(Embedding(max_features+1, embedding_vector_dim))
  #model.add(SimpleRNN(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
  #model.add(SimpleRNN(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
  model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
  model.add(Dense(1, activation='sigmoid'))

  model.summary()

  # try using different optimizers and different optimizer configs
  model.compile(loss= loss,
                optimizer=optimizer,
                metrics= metrics)#, mcor,recall, f1])

  print('Train...')
  history = model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test))
  score, acc = model.evaluate(x_test, y_test,
                              batch_size=batch_size)
  print('Test score:', score)# i think score is loss value
  print('Test accuracy:', acc)
 
  if plot_train_val_graph:
      plot_train_val_graph(history)
      
  return score, acc, history, len(x_train), len(x_test)

In [None]:
def make_lstm_without_embedding(x_train, y_train,x_test, y_test, max_features,embedding_vector_dim = 64, batch_size = 32, epochs = 5, 
                                   loss = 'binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'], plot_train_val_graph = False):
  
  #batch_size = 32

  print('Build model...')
  model = Sequential()
  #model.add(Embedding(max_features+1, embedding_vector_dim))
  #model.add(SimpleRNN(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
  #model.add(SimpleRNN(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
  model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
  model.add(Dense(1, activation='sigmoid'))

  model.summary()

  # try using different optimizers and different optimizer configs
  model.compile(loss= loss,
                optimizer=optimizer,
                metrics= metrics)#, mcor,recall, f1])

  print('Train...')
  history = model.fit(x_train, y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(x_test, y_test))
  score, acc = model.evaluate(x_test, y_test,
                              batch_size=batch_size)
  print('Test score:', score)# i think score is loss value
  print('Test accuracy:', acc)
 
  if plot_train_val_graph:
      plot_train_val_graph(history)
      
  return score, acc, history, len(x_train), len(x_test)

In [8]:
def select_hyperparameters(address_to_read):
    #address_to_read= r"E:/pgmpy/Seq of sensor events_based on activities/based_on_activities.csv"
    #address_to_read = r"E:\pgmpy\Seq of sensor events_no overlap_based on different deltas\delta_{}min.csv"
    #address_to_read = r"E:\pgmpy\Seq of sensor events_based_on_activity_and_no_overlap_delta\delta_{}min.csv"
    results = []
    for delta in list(range(1,16)) + [30,45,60,75,90,100, 120,150, 180,200,240,300,400,500,600,700,800,900,1000]: #:
        x_train, x_test, y_train, y_test, max_features, maxlen = my_data_preparation(address_to_read.format(delta))#imdb_lstm_data_preparation(maxlen=10)
        test_score, test_acc, history, num_of_train_samples, num_of_test_sample = create_model_and_apply_on_data(x_train, y_train,x_test, y_test, max_features)
        results.append([delta, num_of_train_samples, num_of_test_sample, np.mean(history.history['loss']), np.mean(history.history['acc']),history.history['acc'][-1] ,test_score, test_acc])#, history.history
        #print(history.history)
    print_list_of_lists(results, ['delta(min)' ,'#train_samples', '#test_samples', 'train loss ', 'train acc(mean)', 'train_acc(last)', 'val loss ', 'val acc', ])#'history'

In [25]:
#address_to_read= r"E:/pgmpy/Seq of sensor events_based on activities/based_on_activities.csv"
#address_to_read = r"E:\pgmpy\Seq of sensor events_no overlap_based on different deltas\delta_{}min.csv"
address_to_read = r"E:\pgmpy\Seq of sensor events_based_on_activity_and_no_overlap_delta\delta_{}min.csv"
print(address_to_read)    
select_hyperparameters(address_to_read)

E:\pgmpy\Seq of sensor events_based_on_activity_and_no_overlap_delta\delta_{}min.csv
Pad sequences (samples x time)
x_train shape: (13556, 10)
x_test shape: (3389, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_13 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 13556 samples, validate on 3389 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.25673832061
Test accuracy: 0.905871938625
Pad sequences (samples x time)
x_train shape: (9797, 10

Epoch 5/5
Test score: 0.238481441214
Test accuracy: 0.917675544794
Pad sequences (samples x time)
x_train shape: (6129, 10)
x_test shape: (1532, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_18 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_18 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 6129 samples, validate on 1532 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.256576359374
Test accuracy: 0.909268929504
Pad sequences (samples x time)
x_train shape: (5779, 10)
x_test shape: (14

Epoch 4/5
Epoch 5/5
Test score: 0.316821835611
Test accuracy: 0.912559618442
Pad sequences (samples x time)
x_train shape: (4856, 10)
x_test shape: (1214, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_23 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_23 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_23 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 4856 samples, validate on 1214 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.334077790816
Test accuracy: 0.890444810642
Pad sequences (samples x time)
x_train shape: (4719, 10)
x_test 

Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.384836446523
Test accuracy: 0.878181818182
Pad sequences (samples x time)
x_train shape: (3572, 10)
x_test shape: (892, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_28 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_28 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 3572 samples, validate on 892 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.541170395142
Test accuracy: 0.853139013186
Pad sequences (samples x time)
x_train shape: (3239, 10)

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.814344453548
Test accuracy: 0.803867402656
Pad sequences (samples x time)
x_train shape: (2864, 10)
x_test shape: (716, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_33 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_33 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_33 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 2864 samples, validate on 716 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.877320788926
Test accuracy: 0.798882681897
Pad sequences (samples x time)
x_train shape: 

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.969608951867
Test accuracy: 0.769230769231
Pad sequences (samples x time)
x_train shape: (2676, 10)
x_test shape: (669, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_38 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_38 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_38 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 2676 samples, validate on 669 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.955952515828
Test accuracy: 0.733931240034
Pad sequences (samples x time)
x_train shape: 

  % delta_t_median)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 1.0783800885
Test accuracy: 0.680981595458
Pad sequences (samples x time)
x_train shape: (2584, 10)
x_test shape: (646, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_42 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_42 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_42 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 2584 samples, validate on 646 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.950739392002
Test accuracy: 0.758513931704
Pad sequences (samples x time)
x_train shape: (2

Train on 2575 samples, validate on 643 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.950448040829
Test accuracy: 0.754276827464
  delta(min)    #train_samples    #test_samples    train loss     train acc(mean)    train_acc(last epoch)    validation loss     validation acc
------------  ----------------  ---------------  -------------  -----------------  -----------------------  ------------------  ----------------
           1             13556             3389       0.184128           0.921909                 0.927338            0.256738          0.905872
           2              9797             2449       0.177827           0.925467                 0.933755            0.252728          0.912617
           3              8157             2039       0.174996           0.927449                 0.936864            0.244468          0.923492
           4              7224             1805       0.1788             0.927602                 0.937984            0.2

In [27]:
#address_to_read= r"E:/pgmpy/Seq of sensor events_based on activities/based_on_activities.csv"
address_to_read = r"E:\pgmpy\Seq of sensor events_no overlap_based on different deltas\delta_{}min.csv"
#address_to_read = r"E:\pgmpy\Seq of sensor events_based_on_activity_and_no_overlap_delta\delta_{}min.csv"
print(address_to_read)    
select_hyperparameters(address_to_read)

E:\pgmpy\Seq of sensor events_no overlap_based on different deltas\delta_{}min.csv
Pad sequences (samples x time)
x_train shape: (11619, 10)
x_test shape: (2904, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_47 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_47 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_47 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 11619 samples, validate on 2904 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.243222487136
Test accuracy: 0.887741046832
Pad sequences (samples x time)
x_train shape: (7738, 10)

Epoch 5/5
Test score: 0.163896668969
Test accuracy: 0.92238267148
Pad sequences (samples x time)
x_train shape: (3945, 10)
x_test shape: (986, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_52 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_52 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_52 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 3945 samples, validate on 986 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.167655717571
Test accuracy: 0.929006085193
Pad sequences (samples x time)
x_train shape: (3573, 10)
x_test shape: (893, 

  % delta_t_median)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.13204027338
Test accuracy: 0.946210268949
Pad sequences (samples x time)
x_train shape: (2997, 10)
x_test shape: (749, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_55 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_55 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_55 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 2997 samples, validate on 749 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.148540109503
Test accuracy: 0.94259012016
Pad sequences (samples x time)
x_train shape: (2

Train...
Train on 2349 samples, validate on 587 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.168072723381
Test accuracy: 0.943781942078
Pad sequences (samples x time)
x_train shape: (2235, 10)
x_test shape: (558, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_60 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_60 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_60 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 2235 samples, validate on 558 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.193844403848
Test accur

Train on 741 samples, validate on 185 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.227224701482
Test accuracy: 0.859459458815
Pad sequences (samples x time)
x_train shape: (624, 10)
x_test shape: (156, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_65 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_65 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_65 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 624 samples, validate on 156 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.170254581632
Test accuracy: 0.94230

  % delta_t_median)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.325258209821
Test accuracy: 0.798387102542
Pad sequences (samples x time)
x_train shape: (435, 10)
x_test shape: (108, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_68 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_68 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_68 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 435 samples, validate on 108 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.330978439914
Test accuracy: 0.768518518519
Pad sequences (samples x time)
x_train shape: (3

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.453346092077
Test accuracy: 0.730158718805
Pad sequences (samples x time)
x_train shape: (216, 10)
x_test shape: (54, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_73 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_73 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_73 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 216 samples, validate on 54 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.452451977465
Test accuracy: 0.759259250429
Pad sequences (samples x time)
x_train shape: (176

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.449709683657
Test accuracy: 0.857142865658
Pad sequences (samples x time)
x_train shape: (98, 10)
x_test shape: (24, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_78 (Embedding)     (None, None, 64)          7808      
_________________________________________________________________
lstm_78 (LSTM)               (None, 64)                33024     
_________________________________________________________________
dense_78 (Dense)             (None, 1)                 65        
Total params: 40,897
Trainable params: 40,897
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 98 samples, validate on 24 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.556285381317
Test accuracy: 0.708333313465
Pad sequences (samples x time)
x_train shape: (87, 1

In [None]:
create_model_and_apply_on_data(x_train, y_train,x_test, y_test, max_features)

In [11]:
address_to_read = r"E:\pgmpy\Seq of Bag of sensor events_based_on_activity_and_no_overlap_delta\delta_{}min.csv"
address_to_read = address_to_read.format(15)
list_of_data , list_of_persons , list_of_activities = read_sequence_of_bags_CSV_file_with_activity(file_address = address_to_read , has_header = True, separate_data_based_on_persons = False)
create_model_and_apply_on_data(x_train=  list_of_data, y_train = list_of_persons,x_test = list_of_data, y_test = list_of_persons, max_features = 100)

In [17]:
print((list_of_data[1]))

[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0 25 19 11 12  1  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  1  1  1  0  0  0  0  0  0  0  0  0  0  0  1  0
   1  1 10 10  2  2  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]]


In [16]:
address_to_read= r"E:/pgmpy/Seq of sensor events_based on activities/based_on_activities.csv"

x_train, x_test, y_train, y_test, max_features, maxlen = imdb_lstm_data_preparation(maxlen=10)
#my_x_train, my_x_test, my_y_train, my_y_test, my_max_features, my_maxlen = my_data_preparation(address_to_read)
#x_train, x_test, y_train, y_test, max_features, maxlen = my_data_preparation(address_to_read)#imdb_lstm_data_preparation(maxlen=10)
create_model_and_apply_on_data(x_train, y_train,x_test, y_test, max_features,embedding_vector_dim = maxlen)

Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 10)
x_test shape: (25000, 10)
Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 10)          200010    
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                19200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 219,275
Trainable params: 219,275
Non-trainable params: 0
_________________________________________________________________
Train...
Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.528882891769
Test accuracy: 0.72952


(0.52888289176940917,
 0.72951999999999995,
 <keras.callbacks.History at 0x261d2fd8908>,
 25000,
 25000)

In [None]:
embeddings = model.layers[0].get_weights()[0]
print(embeddings)

In [None]:
model.layers[1].

In [None]:
# `word_to_index` is a mapping (i.e. dict) from words to their index, e.g. `love`: 69
words_embeddings = {w:embeddings[idx] for w, idx in word_to_index.items()}

# now you can use it like this for example
print(words_embeddings['love'])  # possible output: [0.21, 0.56, ..., 0.65, 0.10]


In [None]:
print(type(score) , type(acc))