[View in Colaboratory](https://colab.research.google.com/github/iegorval/neural_nets/blob/master/Robot.ipynb)

# Getting Ready
Prepare the files and the libraries for the experiment.

In [0]:
# Installs
!pip install -U -q PyDrive

In [3]:
# Imports
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from keras.layers import LSTM, Input, Activation, Dense, TimeDistributed, Dropout
from keras.models import Sequential
from keras.preprocessing import sequence
import re
from io import StringIO

Using TensorFlow backend.


In [0]:
# PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
# Get all files from the root folder
file_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()

In [0]:
# Get the .pow files 
pow_files = [(file['title'], file['id'], re.sub(r'_[0-9]+|.pow', '', file['title'])) 
             for file in file_list if file['title'].find('.pow')>0]
num_examples = len(pow_files)
# Get the categories
categories = list(set([file_info[2] for file_info in pow_files]))
# Get the dictionary of the categories
num_cat = len(categories)
categories_dict = {categories[i] : i for i in range(num_cat)}

In [0]:
# Make a list of dataframes along with their corresponding classes
list_df = list()
for pow_file in pow_files:
  f = drive.CreateFile({'id': pow_file[1]})
  content = f.GetContentString()
  dataIO = StringIO(content)
  df = pd.read_csv(dataIO, header=None, sep=' ')
  list_df.append((df, to_one_hot(categories_dict[pow_file[2]])))

# Utils
Helper functions.

In [0]:
def to_one_hot(category):
  assert category < num_cat
  one_hot = np.zeros((num_cat))
  one_hot[category] = 1
  return one_hot

In [0]:
def from_one_hot(one_hot):
  for i in range(len(one_hot)):
    if one_hot[i]==1:
      return i

# Model
Recurrent Neural Network model in Keras.

In [0]:
def get_model():
  model = Sequential()
  model.add(LSTM(128, return_sequences=True, input_shape=(max_length,1)))
  model.add(Dropout(0.7))
  model.add(LSTM(128))
  model.add(Dropout(0.7))
  model.add(Dense(num_cat))
  model.add(Activation("softmax"))
  return model

# Data
Preprocess the data: combine all the examples together and introduce padding where necessary. Split the longest time series in the several smaller ones.

In [0]:
#data_train = np.zeros(num_examples, )
max_length = 1000
#biggest_df = max(list_df, key=lambda df_info: len(df_info[0]) if len(df_info[0]) < THRESHOLD else 0)
#max_length = len(biggest_df[0])

In [0]:
# Split the biggest examples into separate sequences
long_dfs = [i for i in range(len(list_df)) if list_df[i][0].shape[0] > max_length]
list_df_cut = list(list_df)
max_examples = 25 #75
examples_categories = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
for i in reversed(long_dfs):
  df = list_df_cut[i][0]
  category = from_one_hot(list_df_cut[i][1])
  for g, split_df in df.groupby(np.arange(len(df)) // max_length):
    if examples_categories[category] < max_examples:
      list_df_cut.append((split_df, list_df[i][1]))
      examples_categories[category] += 1
  del list_df_cut[i]

In [0]:
# Get only the measurements for current
currents_list = [(np.transpose(np.array(df_cut[0][[2]])), df_cut[1]) for df_cut in list_df_cut]
currents_long = [(np.transpose(currents[0]), currents[1]) for currents in currents_list if currents[0].shape[1]>=max_length]
currents_no_labels = [currents[0] for currents in currents_long]
# Pad the measurements with 0s from the beginning
#currents_pad = [np.transpose(sequence.pad_sequences(currents[0], maxlen=max_length, dtype='float64')) for currents in currents_list]
# Get training data into a numpy matrix
train_x = np.stack(currents_no_labels, axis=0)
# Get training labels
train_y = np.array([currents[1] for currents in currents_long])
train_y_labels = [from_one_hot(one_hot) for one_hot in train_y]

In [14]:
k = 0
for label in train_y_labels:
  if label==5:
    k+=1
print(k)
print(categories_dict)
i = 3
#plt.plot(currents_long[i])
print(train_y[i])

10
{'black_rough': 0, 'cubes': 1, 'grass_flat': 2, 'grass_rough': 3, 'flat': 4, 'black_flat': 5}
[0. 0. 0. 0. 0. 1.]


In [15]:
print(train_x.shape, train_y.shape)

(98, 1000, 1) (98, 6)


# Experiment

In [16]:
model = get_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 1000, 128)         66560     
_________________________________________________________________
dropout_1 (Dropout)          (None, 1000, 128)         0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 774       
_________________________________________________________________
activation_1 (Activation)    (None, 6)                 0         
Total params: 198,918
Trainable params: 198,918
Non-trainable params: 0
_________________________________________________________________


In [0]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [0]:
model.fit(train_x, train_y, epochs = 100)

In [0]:
model.evaluate(x=train_x, y=train_y)