In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os

from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense,Dropout
from keras.models import Sequential
from keras.callbacks import EarlyStopping,ModelCheckpoint

from sklearn.model_selection import train_test_split


%matplotlib inline
# Any results you write to the current directory are saved as output.

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
test_df = pd.read_json('test.json')
train_df = pd.read_json('train.json')

In [5]:
target=train_df['cuisine']
train=train_df.drop('cuisine',axis=1)
test=test_df
target.head()

0        italian
1          greek
2        italian
3    southern_us
4         french
Name: cuisine, dtype: object

In [6]:
t=Tokenizer()
t.fit_on_texts(train['ingredients'])
train_encoded=t.texts_to_matrix(train['ingredients'],mode='tfidf')

In [7]:
cuisines=train_df['cuisine'].unique()
label2index={cuisine:i for i,cuisine in enumerate(cuisines)}
y=[]

for item in target:
    if item in label2index.keys():
        y.append(label2index[item])
y_encoded=to_categorical(y,20)

In [8]:
print(train_encoded.shape)
print(y_encoded.shape)

(29774, 6189)
(29774, 20)


In [9]:
def build_model():
    model=Sequential()
    model.add(Dense(256,input_shape=[train_encoded.shape[1], ],activation='relu',name='hidden_1'))
    model.add(Dropout(0.4, name='dropout_1'))
    
    #model.add(Dense(64,activation='relu',name='hidden_2'))
    #model.add(Dropout(0.2,name='dropout_2'))
    
    model.add(Dense(20,name='output'))
    
    model.compile(optimizer='adam',
                  loss='categorical_hinge',
                  metrics=['accuracy']
                )
    
    return model

In [10]:
X_train,X_val,y_train,y_val=train_test_split(train_encoded,y_encoded,test_size=0.2,random_state=22)

In [11]:
model=build_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_1 (Dense)             (None, 256)               1584640   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
output (Dense)               (None, 20)                5140      
Total params: 1,589,780
Trainable params: 1,589,780
Non-trainable params: 0
_________________________________________________________________


In [12]:
monitor=[
    EarlyStopping(monitor='val_loss',patience=5,verbose=1),
    ModelCheckpoint('best-model-0.h5',monitor='val_loss',save_best_only=True,save_weights_only=True)
]

history = model.fit(X_train,y_train,
         validation_data=(X_val,y_val),
         epochs=100,
         callbacks=monitor,
         batch_size=128)

Train on 23819 samples, validate on 5955 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 00007: early stopping


<keras.callbacks.History at 0x1a2f0f8cf8>

In [13]:
test_encoded=t.texts_to_matrix(test_df['ingredients'],mode='tfidf')
test_encoded.shape

(10000, 6189)

In [14]:
model.load_weights('best-model-0.h5')
y_pred=model.predict(test_encoded).argmax(axis=1)

results=[]

for i in y_pred:
    for k,v in label2index.items():
        if v==i:
            results.append(k)

results[:10]

['korean',
 'italian',
 'italian',
 'filipino',
 'italian',
 'cajun_creole',
 'italian',
 'thai',
 'chinese',
 'southern_us']

In [16]:
submission=pd.DataFrame(list(zip(test_df['id'],results)),columns=['id','cuisine'])
submission.to_csv('submission.csv',header=True,index=False)

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()