In [23]:
import pandas as pd
import numpy as np
np.random.seed(2018)

from scipy.sparse import csr_matrix, hstack
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import KFold
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.callbacks import EarlyStopping

In [3]:
train = pd.read_csv('train.csv')

In [4]:
df_train = train.sample(n=100000)

y = np.log( df_train['loss'].values )
sparse_data = []

### Categorical Variables

In [5]:
feat_cats = [f for f in df_train.columns if 'cat' in f]
for feat in feat_cats:
    dummy = pd.get_dummies(df_train[feat].astype('category'))
    tmp = csr_matrix(dummy)
    sparse_data.append(tmp)

### Continuous Variables

In [6]:
f_num = [f for f in df_train.columns if 'cont' in f]
scaler = StandardScaler()
tmp = csr_matrix(scaler.fit_transform(df_train[f_num]))
sparse_data.append(tmp)

In [7]:
X = hstack(sparse_data, format = 'csr')
X

<100000x1093 sparse matrix of type '<type 'numpy.float64'>'
	with 13000000 stored elements in Compressed Sparse Row format>

In [17]:
def nn_model(input_dim):
    model = Sequential()

    model.add(Dense(400, input_dim = input_dim, kernel_initializer='he_normal'))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(0.45))

    model.add(Dense(200, kernel_initializer='he_normal'))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(50, kernel_initializer='he_normal'))
    model.add(PReLU())
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(1, kernel_initializer = 'he_normal'))
    model.compile(loss = 'mae', optimizer = 'adadelta')
    return(model)

In [20]:
def batch_generator(X, y, batch_size, shuffle):
    #chenglong code for fiting from generator (https://www.kaggle.com/c/talkingdata-mobile-user-demographics/forums/t/22567/neural-network-for-sparse-matrices)
    number_of_batches = np.ceil(X.shape[0]/batch_size)
    counter = 0
    sample_index = np.arange(X.shape[0])

    if shuffle:
        np.random.shuffle(sample_index)
        
    while True:
        batch_index = sample_index[batch_size*counter:batch_size*(counter+1)]
        X_batch = X[batch_index,:].toarray()
        y_batch = y[batch_index]
        counter += 1
        yield X_batch, y_batch

        if (counter == number_of_batches):
            if shuffle:
                np.random.shuffle(sample_index)
            counter = 0
            
def batch_generatorp(X, batch_size, shuffle):
    number_of_batches = X.shape[0] / np.ceil(X.shape[0]/batch_size)
    counter = 0
    sample_index = np.arange(X.shape[0])
    while True:
        batch_index = sample_index[batch_size * counter:batch_size * (counter + 1)]
        X_batch = X[batch_index, :].toarray()
        counter += 1
        yield X_batch
        if (counter == number_of_batches):
            counter = 0

In [21]:
nepochs = 4
nfolds = 3

cv = KFold(n_splits=nfolds, shuffle = True, random_state = 2018)

scores = []
for num_iter, (train_index, test_index) in enumerate(cv.split(y)):
    X_train, y_train = X[train_index], y[train_index]
    X_test, y_test   = X[test_index], y[test_index]
        
    model = nn_model(X_train.shape[1])

    model.fit_generator(generator = batch_generator(X_train, y_train, 128, True),
                                  epochs = nepochs,
                                  steps_per_epoch = 100,
                                  validation_data=(X_test.todense(), y_test),
                                  validation_steps=62.5,
                                  verbose = 2) 
    
    y_pred = np.exp(model.predict_generator(generator = batch_generatorp(X_test, 128, False), steps = X_test.shape[0])[:,0])

    score = mean_absolute_error(np.exp(y_test), y_pred)
    print("Fold{0}, score={1}".format(num_iter+1, score))
    scores.append(score)
    
print(np.mean(scores), np.std(scores))

Epoch 1/4
 - 13s - loss: 7.4744 - val_loss: 7.3758
Epoch 2/4
 - 6s - loss: 6.6436 - val_loss: 5.7869
Epoch 3/4
 - 6s - loss: 4.0099 - val_loss: 1.1226
Epoch 4/4
 - 7s - loss: 1.5846 - val_loss: 0.8870
Fold1, score=1947.955792789534
Epoch 1/4
 - 9s - loss: 7.4461 - val_loss: 7.2601
Epoch 2/4
 - 6s - loss: 6.5694 - val_loss: 5.9520
Epoch 3/4
 - 6s - loss: 3.2359 - val_loss: 0.8985
Epoch 4/4
 - 6s - loss: 1.4756 - val_loss: 0.7073
Fold2, score=1734.5036701510905
Epoch 1/4
 - 10s - loss: 7.4502 - val_loss: 7.0844
Epoch 2/4
 - 6s - loss: 6.0419 - val_loss: 2.6340
Epoch 3/4
 - 7s - loss: 1.9154 - val_loss: 1.1916
Epoch 4/4
 - 7s - loss: 1.5568 - val_loss: 0.9011
Fold3, score=1952.923450412031


What next?

## [Blending](https://mlwave.com/kaggle-ensembling-guide/) & [Stacking](http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/)

![](../images/stacking.gif)