In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import f1_score
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from keras.layers import LeakyReLU, PReLU, ELU, ParametricSoftplus, ThresholdedLinear, ThresholdedReLU, SReLU

from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization, Dropout
from keras.regularizers import l1, l2, l1l2
from keras.optimizers import Adam
from keras.callbacks import Callback
from keras import backend as K

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 5.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'

Using Theano backend.
Using gpu device 0: GeForce GTX 670 (CNMeM is disabled, CuDNN 4007)


In [2]:
categories = [
  'good_for_lunch', 
  'good_for_dinner', 
  'takes_reservations', 
  'outdoor_seating',
  'restaurant_is_expensive',
  'has_alcohol',
  'has_table_service',
  'ambience_is_classy',
  'good_for_kids'
]

vgg_cols = ['f' + str(i) for i in range(4096)]

In [3]:
data = pd.read_hdf('data/Xtrain_grouped.hdf5')
X, Y = np.array(data[vgg_cols]), np.array(data[categories])

random_state = np.random.RandomState(0)
Xtrain, Xval, ytrain, yval = train_test_split(X, Y, test_size=.2, random_state=random_state)
valset = (Xval, yval)

print Xtrain.shape, Xval.shape, ytrain.shape, yval.shape

(1596, 4096) (400, 4096) (1596, 9) (400, 9)


In [4]:
def loss(ytrue, ypred):
  return K.sum( (ytrue - ypred) ** 2 )

In [5]:
class f1printerCallback(Callback):
  def __init__(self, savefile=None):
    self.bestf1train = 0
    self.bestf1val = 0
    self.bestmulti = 0
    
    self.savefile = savefile
  
  def on_train_begin(self, logs={}):
    self.epochs = []
    self.history = {'trainf1': [], 'valf1': []}

  def on_epoch_end(self, epoch, logs={}):
    pred = self.model.predict(Xtrain)
    pred[pred < .5] = 0
    pred[pred > .5] = 1
    f1_train = f1_score(ytrain, pred, average='micro')
    
    pred = self.model.predict(Xval)
    pred[pred < .5] = 0
    pred[pred > .5] = 1
    f1_val = f1_score(yval, pred, average='micro')

    multi = f1_train * f1_val
    if f1_val > self.bestf1val:
#     if f1_train > self.bestf1train:
#     if multi > self.bestmulti:
      print 'Epoch %d -- train %f, val %f, multi %f' % (epoch, f1_train, f1_val, multi)
      self.bestmulti = multi
      self.bestf1val = f1_val
      
      if f1_val > 0.84 and self.savefile:
        print 'Saving weights...'
        self.model.save_weights(self.savefile, overwrite=True)
    
    self.epochs.append(epoch)
    self.history['trainf1'].append(f1_train)
    self.history['valf1'].append(f1_val)

# Model1: This configurations is currently the best one

Epoch 253 -- train 0.961918, val 0.844794, multi 0.812623

0.80999 Kaggle LB

In [6]:
# model = Sequential()

# np.random.seed(0)

# activation = 'softplus'
# dropout_val = .5
# n_neurons = 100

# model.add(Dense(n_neurons, input_shape=(4096,), activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(9))
# model.add(Activation('sigmoid'))

# model.compile(loss=loss, optimizer='adam')

# Model2

Epoch 107 -- train 0.942859, val 0.846003, multi 0.797662

In [7]:
# model = Sequential()

# np.random.seed(0)

# activation = 'softplus'
# dropout_val = .35
# n_neurons = 200

# model.add(Dense(n_neurons, input_shape=(4096,), activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(n_neurons, activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(9))
# model.add(Activation('sigmoid'))

# model.compile(loss=loss, optimizer='adam')

# Model3

Epoch 227 -- train 0.935710, val 0.846756, multi 0.792318

In [8]:
# model = Sequential()

# np.random.seed(0)

# activation = 'softplus'
# dropout_val = .4
# n_neurons = 100

# model.add(Dense(n_neurons, input_shape=(4096,), activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(n_neurons, activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(n_neurons, activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(9))
# model.add(Activation('sigmoid'))

# model.compile(loss=loss, optimizer='adam')

# Model4

Epoch 138 -- train 0.947807, val 0.847851, multi 0.803599

In [9]:
# model = Sequential()

# np.random.seed(0)

# activation = 'softplus'
# dropout_val = .35
# n_neurons = 200

# model.add(Dense(n_neurons, input_shape=(4096,), activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(n_neurons, activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(n_neurons, activation=activation))
# model.add(Dropout(dropout_val))

# model.add(Dense(9))
# model.add(Activation('sigmoid'))

# model.compile(loss=loss, optimizer='adam')

# Model5

Epoch 125 -- train 0.949649, val 0.848280, multi 0.805568

LB 0.80318

In [10]:
# model = Sequential()

# np.random.seed(0)

# dropout_val = .4
# n_neurons = 300
# activation = SReLU

# model.add(Dense(n_neurons, input_shape=(4096,)))
# model.add(activation())
# model.add(Dropout(dropout_val))

# model.add(Dense(n_neurons))
# model.add(activation())
# model.add(Dropout(dropout_val))

# model.add(Dense(n_neurons))
# model.add(activation())
# model.add(Dropout(dropout_val))

# model.add(Dense(9))
# model.add(Activation('sigmoid'))

# model.compile(loss=loss, optimizer='adam')

# Retrain best models and save their weights

#### Model1

In [11]:
model1 = Sequential()
np.random.seed(0)
model1.add(Dense(100, input_shape=(4096,), activation='softplus'))
model1.add(Dropout(.5))
model1.add(Dense(9))
model1.add(Activation('sigmoid'))
model1.compile(loss=loss, optimizer='adam')

hist = model1.fit(Xtrain, ytrain, verbose=0, nb_epoch=254)

pred = model1.predict(Xval)
pred[pred < .5] = 0
pred[pred > .5] = 1
f1_val = f1_score(yval, pred, average='micro')
print
print f1_val


0.844793713163


#### Model2

In [12]:
model2 = Sequential()
np.random.seed(0)
model2.add(Dense(200, input_shape=(4096,), activation='softplus'))
model2.add(Dropout(.35))
model2.add(Dense(200, activation='softplus'))
model2.add(Dropout(.35))
model2.add(Dense(9))
model2.add(Activation('sigmoid'))
model2.compile(loss=loss, optimizer='adam')

hist = model2.fit(Xtrain, ytrain, verbose=0, nb_epoch=108)

pred = model2.predict(Xval)
pred[pred < .5] = 0
pred[pred > .5] = 1
f1_val = f1_score(yval, pred, average='micro')
print
print f1_val


0.846002805049


#### Model3

In [13]:
model3 = Sequential()
np.random.seed(0)
model3.add(Dense(100, input_shape=(4096,), activation='softplus'))
model3.add(Dropout(.4))
model3.add(Dense(100, activation='softplus'))
model3.add(Dropout(.4))
model3.add(Dense(100, activation='softplus'))
model3.add(Dropout(.4))
model3.add(Dense(9))
model3.add(Activation('sigmoid'))
model3.compile(loss=loss, optimizer='adam')

hist = model3.fit(Xtrain, ytrain, verbose=0, nb_epoch=228)

pred = model3.predict(Xval)
pred[pred < .5] = 0
pred[pred > .5] = 1
f1_val = f1_score(yval, pred, average='micro')
print
print f1_val


0.846756152125


#### Model4

In [14]:
model4 = Sequential()
np.random.seed(0)
model4.add(Dense(200, input_shape=(4096,), activation='softplus'))
model4.add(Dropout(.35))
model4.add(Dense(200, activation='softplus'))
model4.add(Dropout(.35))
model4.add(Dense(200, activation='softplus'))
model4.add(Dropout(.35))
model4.add(Dense(9))
model4.add(Activation('sigmoid'))
model4.compile(loss=loss, optimizer='adam')

hist = model4.fit(Xtrain, ytrain, verbose=0, nb_epoch=139)

pred = model4.predict(Xval)
pred[pred < .5] = 0
pred[pred > .5] = 1
f1_val = f1_score(yval, pred, average='micro')
print
print f1_val


0.847850678733


#### Model 5

In [15]:
model5 = Sequential()
np.random.seed(0)
model5.add(Dense(300, input_shape=(4096,)))
model5.add(SReLU())
model5.add(Dropout(.4))
model5.add(Dense(300))
model5.add(SReLU())
model5.add(Dropout(.4))
model5.add(Dense(300))
model5.add(SReLU())
model5.add(Dropout(.4))
model5.add(Dense(9))
model5.add(Activation('sigmoid'))
model5.compile(loss=loss, optimizer='adam')

hist = model5.fit(Xtrain, ytrain, verbose=0, nb_epoch=126)

pred = model5.predict(Xval)
pred[pred < .5] = 0
pred[pred > .5] = 1
f1_val = f1_score(yval, pred, average='micro')
print
print f1_val


0.848279751833


# Build an ensemble

### Try on val data

In [28]:
models = [model1, model2, model3, model4, model5]
ensemble_pred = np.zeros((5, Xval.shape[0], 9))

for i, m in enumerate(models):
  pred = m.predict(Xval)
  ensemble_pred[i] = pred

pred_mean = ensemble_pred.mean(axis=0)
pred_mean[pred_mean < .5] = 0
pred_mean[pred_mean > .5] = 1
pred_mean.shape

f1_score(yval, pred_mean, average='macro')

0.83150126671899394

### Try on test data

KAGGLE LB 0.81371!!! 14th place atm

In [21]:
models = [model1, model2, model3, model4, model5]
ensemble_pred = np.zeros((5, Xtest.shape[0], 9))

for i, m in enumerate(models):
  pred = m.predict(Xtest)
  ensemble_pred[i] = pred

pred = ensemble_pred.mean(axis=0)
pred[pred < .5] = 0
pred[pred > .5] = 1
print pred.shape

(10000, 9)


In [308]:
cb = f1printerCallback()
hist = model.fit(Xtrain, ytrain, callbacks=[cb], verbose=0, nb_epoch=200)

Epoch 0 -- train 0.757415, val 0.736081, multi 0.557519
Epoch 1 -- train 0.792720, val 0.770306, multi 0.610637
Epoch 3 -- train 0.805266, val 0.780460, multi 0.628478
Epoch 4 -- train 0.807675, val 0.786365, multi 0.635127
Epoch 5 -- train 0.829300, val 0.805268, multi 0.667808
Epoch 8 -- train 0.837286, val 0.807596, multi 0.676188
Epoch 9 -- train 0.842008, val 0.814174, multi 0.685541
Epoch 12 -- train 0.848854, val 0.818757, multi 0.695006
Epoch 15 -- train 0.855214, val 0.821956, multi 0.702948
Epoch 17 -- train 0.863123, val 0.827352, multi 0.714107
Epoch 18 -- train 0.865264, val 0.829116, multi 0.717404
Epoch 19 -- train 0.862231, val 0.834113, multi 0.719198
Epoch 26 -- train 0.874864, val 0.834335, multi 0.729929
Epoch 33 -- train 0.883403, val 0.835813, multi 0.738359
Epoch 36 -- train 0.886229, val 0.835982, multi 0.740872
Epoch 39 -- train 0.888161, val 0.841870, multi 0.747716
Epoch 56 -- train 0.907545, val 0.843534, multi 0.765545
Epoch 73 -- train 0.911717, val 0.8448

In [309]:
model.load_weights('models/dummy_dense_average_model.h5')

pred = model.predict(Xtrain)
pred[pred < .5] = 0
pred[pred > .5] = 1

f1_train = f1_score(ytrain, pred, average='micro')

pred = model.predict(Xval)
pred[pred < .5] = 0
pred[pred > .5] = 1

f1_val = f1_score(yval, pred, average='micro')

print f1_train, f1_val

0.949648543968 0.848279751833


# Try this little stuff on test data

In [17]:
mlb = MultiLabelBinarizer()
mlb.fit([(0,1,2,3,4,5,6,7,8)])

dataTest = None
for i in range(1, 7):
  part = pd.read_hdf('data/Xtest_grouped_part' + str(i) + '.hdf5', 'Xtest')
  print i, len(part)
  
  if dataTest is None:
    dataTest = part
  else:
    dataTest = dataTest.append(part)

1 1637
2 1707
3 1654
4 1746
5 1735
6 1521


In [18]:
Xtest = np.array(dataTest[vgg_cols])
Xtest.shape

(10000, 4096)

In [313]:
pred = model.predict(Xtest)
pred[pred < .5] = 0
pred[pred > .5] = 1

In [22]:
labels = mlb.inverse_transform(pred)

In [23]:
labels_str = [' '.join(map(str, l)) for l in labels]

In [24]:
results = pd.DataFrame({'business_id': dataTest.index, 'labels': pd.Series(labels_str)})
results.head()

Unnamed: 0,business_id,labels
0,003sg,1 2 3 5 6
1,00er5,1 2 3 5 6 8
2,00kad,1 2 3 5 6 8
3,00mc6,1 2 5 6
4,00q7x,1 2 4 5 6 7


In [25]:
results.to_csv('results/first_ensemble_val_8471.csv', index=False)