In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf

import torch

In [2]:
print("Pytorch Version: ", torch.__version__)
print("Tensorflow Version: ", tf.__version__)

Pytorch Version:  1.5.0+cpu
Tensorflow Version:  2.1.0


## Deep Learning Baseline Model Cheatsheet

In [3]:
"""
learning_rate = 
loss = 
optimizer = tf.keras.optimizer.
metircs = 
n_epochs = 
batch_size = 
val_split =
early_stopping = 

layer = tf.keras.layers.Dense(units = 1, input_dim = input_dim / input_shape = input_shape)
activation = tf.keras.layers.Activation("")

model = tf.keras.Sequential()      # or initialize here with using a list format

model.add()

model.compile(loss = loss, optimizer = optimizer, metrics = metrics)

model.summary()

model.fit(X, y, epochs = n_epochs, batch_size=batch_size, validation_split=val_split
            , callbacks = [tf.keras.callbacks.EarlyStopping(patience=early_stopping, monitor="val_loss")])

history = model.fit()

model.predict(X)
model.predict_classes(X)

history.history["loss"]

model.evaluate(X, y)

"""

'\nlearning_rate = \nloss = \noptimizer = tf.keras.optimizer.\nmetircs = \nn_epochs = \nbatch_size = \nval_split =\nearly_stopping = \n\nlayer = tf.keras.layers.Dense(units = 1, input_dim = input_dim / input_shape = input_shape)\nactivation = tf.keras.layers.Activation("")\n\nmodel = tf.keras.Sequential()      # or initialize here with using a list format\n\nmodel.add()\n\nmodel.compile(loss = loss, optimizer = optimizer, metrics = metrics)\n\nmodel.summary()\n\nmodel.fit(X, y, epochs = n_epochs, batch_size=batch_size, validation_split=val_split\n            , callbacks = [tf.keras.callbacks.EarlyStopping(patience=early_stopping, monitor="val_loss")])\n\nhistory = model.fit()\n\nmodel.predict(X)\nmodel.predict_classes(X)\n\nhistory.history["loss"]\n\nmodel.evaluate(X, y)\n\n'

In [4]:
learning_rate = 0.01
activation_list = ["sigmoid", "relu","tanh","softmax","elu","selu",
                   "deserialize","exponential","get","hard_sigmoid",
                   "linear","serialize","softplus","softsign"]

loss_list = ["mse","categorical_crossentropy", tf.keras.losses.BinaryCrossentropy(from_logits=True)]

optimizer_list = [tf.keras.optimizers.SGD(lr=learning_rate),tf.keras.optimizers.Adam(lr=learning_rate),
                 tf.keras.optimizers.Adamax(lr=learning_rate), tf.keras.optimizers.Adadelta(lr=learning_rate),
                 tf.keras.optimizers.Adagrad(lr=learning_rate), tf.keras.optimizers.RMSprop(lr=learning_rate)]

metrics_list = ["accuracy"]

### Image Data 

* Use vanilla CNN
* Data from https://www.kaggle.com/c/digit-recognizer

In [5]:
mnist_data = pd.read_csv("../data/mnist_train.csv")

In [6]:
X = mnist_data.iloc[:,1:].values.copy()
y = pd.get_dummies(mnist_data.iloc[:,0]).values
# y = tf.keras.utils.to_categorical(mnist_data.iloc[:,0], 10)    yields same result

In [7]:
model = tf.keras.Sequential()    ## or tf.keras.models.Sequential()

## Different models

n = len(X[0])
num_units = 1   # output dimension for the dense layer
input_shape = (n,)   # input shape: shape of input (tuple) / input_dim = dimension of input (integer) (define only at the first layer)
learning_rate = 0.01

# You can also construct the layers in the initialization of the model
# model = tf.keras.Sequential([tf.keras.layers.Dense(units = 10, input_shape = input_shape, activation = activation_list[0])])

model.add(tf.keras.layers.Dense(units = 5, input_shape = input_shape, activation = activation_list[0]))
model.add(tf.keras.layers.Dense(units = 10, input_dim = 5))
model.add(tf.keras.layers.Activation(activation_list[1]))


model.compile(loss = loss_list[1], optimizer = optimizer_list[0], metrics = [metrics_list[0]])

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 5)                 3925      
_________________________________________________________________
dense_1 (Dense)              (None, 10)                60        
_________________________________________________________________
activation (Activation)      (None, 10)                0         
Total params: 3,985
Trainable params: 3,985
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(X, y, epochs = 10, batch_size=32, validation_split=0.25
            , callbacks = [tf.keras.callbacks.EarlyStopping(patience=5, monitor="val_loss")])

Train on 31500 samples, validate on 10500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f191c0bf2d0>

### Text Data

* Use Vanilla RNN/LSTM/GRU
* Data from https://www.kaggle.com/rounakbanik/the-movies-dataset

In [47]:
# the original movies data has been broken down into words with stopwords removed and genres have been cut down to one genres for each movies
movie_data = pd.read_csv("../data/movies_data.csv")[["original_title","tokens","one_genres"]]

In [48]:
movie_data["genre_onehot"] = pd.Series(pd.get_dummies(movie_data.one_genres).values.tolist())
movie_data["genre_ind"] = movie_data["genre_onehot"].apply(lambda x: x.index(1))

movie_data = movie_data.drop(movie_data[movie_data.tokens.apply(lambda x: eval(x) == [])].index).reset_index(drop = True)

X = movie_data["tokens"].copy()
y = np.array(pd.get_dummies(movie_data.one_genres).values.tolist())    # numpy array works better with tensorflow (pandas doesnt work if the elements are arrays)

# alternatively you could use tensorflow framework to generate the one-hot vector
# y_tokenized = ytokenizer.texts_to_matrix(y)

# codes below here turns integers into one-hot
# from tensorflow.keras.utils import to_categorical
# y_categorical = to_categorical(y_tokenized)

In [49]:
seq_len = 60 # max length for each sequence

# Tokenization

tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(X)
X_tokenized = tokenizer.texts_to_sequences(X)

# Padding
padding_methods = ["pre","post"]
X_padded = tf.keras.preprocessing.sequence.pad_sequences(X_tokenized, padding=padding_methods[0], maxlen=seq_len)

# tokenizer.word_index == dictionary of word2index
# tokenizer.index_word == dictionary of index2word

# align the elements so that they all have equal "input_length" (manual padding)
#max_seq_length = X.apply(len).max()
#X = X.apply(lambda x: x + [0]*(max_seq_length - len(x)))

In [13]:
# Embedding layer (can only be used as the first layer)
"""
N = vocab size + 1 (if vocab size = 999, N = 1000)
embedding_size = the size of the input
input_length = length of the sentence

takes input of (batch, input_length)

model.output_shape == (None, input_length, embedding_size)
"""
N = len(X.explode().unique()) + 1
embedding_size = 500
input_length = max_seq_length

model = tf.keras.Sequential([tf.keras.layers.Embedding(N, embedding_size)])

model.compile("rmsprop", loss = loss_list[1])

embedded = model.predict(X_padded)

In [61]:
num_units = 64

"""
modified from https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRU

inner_size = vocab size (or embedded size if embedded)
batch_size = number of sequences (data)
seq_length = length of each data (sentence)

inputs = np.random.random([batch_size, seq_length, inner_size]).astype(np.float32)
gru = tf.keras.layers.GRU(num_units)

output = gru(inputs)  # The output has shape `[batch_size, num_units]`.

gru = tf.keras.layers.GRU(num_units, return_sequences=True, return_state=True)

# whole_sequence_output has shape `[batch_size, seq_length, inner_size]`.
# final_state has shape `[batch_size, num_units]`.
whole_sequence_output, final_state = gru(inputs)
"""
cat_classes = len(y[0])

model = tf.keras.Sequential([tf.keras.layers.Embedding(N, embedding_size)])

rnn_lstm_gru = [tf.keras.layers.LSTM(units = num_units), tf.keras.layers.GRU(units = num_units)]

model.add(tf.keras.layers.Bidirectional(rnn_lstm_gru[1]))

model.add(tf.keras.layers.Dense(units = cat_classes))

model.compile(loss = loss_list[1], optimizer = optimizer_list[5], metrics = [metrics_list[0]])

model.fit(X_padded, y, epochs = 5, batch_size = 1000, validation_split=0.25
            , callbacks = [tf.keras.callbacks.EarlyStopping(patience=3, monitor="val_loss")])

Train on 31444 samples, validate on 10482 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f1903f4d850>

### Time-series Data

* cryptocurrency data from https://www.kaggle.com/philmohun/cryptocurrency-financial-data#consolidated_coin_data.csv