# Loading data

In [1]:
import pandas as pd

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target
0,1,A,B,A,A,B,D,A,E,C,...,0.881122,0.42165,0.741413,0.895799,0.802461,0.724417,0.701915,0.877618,0.719903,6.994023
1,2,B,A,A,A,B,B,A,E,A,...,0.440011,0.34623,0.278495,0.593413,0.546056,0.613252,0.741289,0.326679,0.808464,8.071256
2,3,A,A,A,C,B,D,A,B,C,...,0.914155,0.369602,0.832564,0.86562,0.825251,0.264104,0.695561,0.869133,0.828352,5.760456
3,4,A,A,A,C,B,D,A,E,G,...,0.934138,0.57893,0.407313,0.868099,0.794402,0.494269,0.698125,0.809799,0.614766,7.806457
4,6,A,B,A,A,B,B,A,E,C,...,0.3826,0.70594,0.325193,0.440967,0.462146,0.724447,0.683073,0.343457,0.297743,6.868974


In [3]:
cat_size = [len(train[x].unique()) for x in train.columns if x.startswith('cat')]
cat_size

[2, 2, 2, 4, 4, 4, 8, 8, 7, 15]

In [4]:
cat_cols = [x for x in train.columns if x.startswith('cat')]
cat_cols

['cat0',
 'cat1',
 'cat2',
 'cat3',
 'cat4',
 'cat5',
 'cat6',
 'cat7',
 'cat8',
 'cat9']

In [5]:
cont_cols = [x for x in train.columns if x.startswith('cont')]
cont_cols

['cont0',
 'cont1',
 'cont2',
 'cont3',
 'cont4',
 'cont5',
 'cont6',
 'cont7',
 'cont8',
 'cont9',
 'cont10',
 'cont11',
 'cont12',
 'cont13']

# Define the model

In [6]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Embedding, Reshape, concatenate, Dense, Lambda
from tensorflow.keras.optimizers import Adam

In [7]:
def build_model():
    combi_input = Input((len(cat_cols)+len(cont_cols),))
    cat_input = []
    for i, cat in enumerate(cat_cols):
        ci = combi_input[:, i]
        cat_input.append(ci)

    cont_input = combi_input[:, len(cat_cols):]
    inputs = cat_input
    inputs.append(cont_input)

    cat0 = cat_input[0]
    cat0 = Reshape(target_shape=(1,))(cat0)

    cat1 = cat_input[1]
    cat1 = Reshape(target_shape=(1,))(cat1)

    
    cat2 = cat_input[2]
    cat2 = Reshape(target_shape=(1,))(cat2)
    
    cat3 = Embedding(cat_size[3], 2, name='cat3_embedding')(cat_input[3])
    
    cat4 = Embedding(cat_size[4], 2, name='cat4_embedding')(cat_input[4])
    
    cat5 = Embedding(cat_size[5], 2, name='cat5_embedding')(cat_input[5])
    
    cat6 = Embedding(cat_size[6], 4, name='cat6_embedding')(cat_input[6])
    
    cat7 = Embedding(cat_size[7], 4, name='cat7_embedding')(cat_input[7])
    
    cat8 = Embedding(cat_size[8], 4, name='cat8_embedding')(cat_input[8])
    
    cat9 = Embedding(cat_size[9], 7, name='cat9_embedding')(cat_input[9])
    
    embedded = [cont_input, cat0, cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9]
    output = concatenate(embedded)
    output = Dense(200, activation='relu')(output)
    output = Dense(100, activation='relu')(output)
    output = Dense(1)(output)

    model = Model(inputs=combi_input, outputs=output)
    initial_learning_rate = 0.01
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=200,
        decay_rate=0.96,
        staircase=False)
    optimizer = Adam(learning_rate=lr_schedule)
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model

In [8]:
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

In [9]:
from sklearn.pipeline import Pipeline
from categorical_transform import IntegerCategoricalTransform
p = Pipeline([('cat_trans', IntegerCategoricalTransform(cat_cols)), 
              ('mlp', KerasRegressor(build_model, epochs=20))])

In [10]:
x_train = train.drop(columns=['target','id'])
y_train = train['target']
p.fit(x_train, y_train)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20

KeyboardInterrupt: 

In [None]:
from sklearn.model_selection import cross_validate
scores = cross_validate(p, X=x_train, y=y_train, cv=5, return_train_score = True,
                         scoring='neg_root_mean_squared_error')
scores