# Keras Sandbox

## Notes and Sources
- Contrastive Loss
- Triplet Loss
https://github.com/maciejkula/triplet_recommendations_keras
- Comparison
https://www.quora.com/What-are-the-advantages-of-using-a-triplet-loss-function-over-a-contrastive-loss-How-would-you-decide-which-to-use

In [111]:
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout

## Load Data

In [60]:
def load_data(config, balance=True):
    print('Loading data...')
    X_r = np.load(config + "_X_r.npy")
    X_s = np.load(config + "_X_s.npy")
    y = np.load(config + "_y.npy")
    print('Data loaded.')
    
    if balance:
        print("Balancing...")
        X_r, X_s, y = shuffle(X_r, X_s, y)
        #TODO get top count of 1s and 0s
    return X_r, X_s, y

## MLP with concatenation

### Prepare Data

In [110]:
Xr, Xs, y = load_data("test") # shape (samples, length, one-hot-size)

# if the regex one-hot vector is larger than the string vector
if Xr.shape[2] != Xs.shape[2]:
    Xs = np.concatenate((Xs, np.zeros((Xs.shape[0], Xs.shape[1], Xr.shape[2] - Xs.shape[2]))), axis=2)

X = np.concatenate((Xr, Xs), axis=1)
print(X.shape)
print(y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train = X_train[0:10]
y_train = y_train[0:10]
X_test = X_test[0:1000]
y_test = y_test[0:1000]
print(y_train)

Loading data...
Data loaded.
(2468682, 15, 6)
(2468682, 1)
[[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]]


In [107]:
model = Sequential()
model.add(keras.layers.Flatten(input_shape=(15, 6)))
#model.add(Dense(64, activation='relu'))
#model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(X_train, y_train,
          epochs=20,
          batch_size=128)
score = model.evaluate(X_test, y_test, batch_size=128)
print(score)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[0.20742215871810912, 0.9960000023841858]
