In [2]:
import os

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import QuantileTransformer,  KBinsDiscretizer
from sklearn.model_selection import train_test_split
import warnings

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

2021-09-18 23:18:13.375521: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [3]:
train = pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')
train.head()

Unnamed: 0,id,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f110,f111,f112,f113,f114,f115,f116,f117,f118,claim
0,0,0.10859,0.004314,-37.566,0.017364,0.28915,-10.251,135.12,168900.0,399240000000000.0,...,-12.228,1.7482,1.9096,-7.1157,4378.8,1.2096,861340000000000.0,140.1,1.0177,1
1,1,0.1009,0.29961,11822.0,0.2765,0.4597,-0.83733,1721.9,119810.0,3874100000000000.0,...,-56.758,4.1684,0.34808,4.142,913.23,1.2464,7575100000000000.0,1861.0,0.28359,0
2,2,0.17803,-0.00698,907.27,0.27214,0.45948,0.17327,2298.0,360650.0,12245000000000.0,...,-5.7688,1.2042,0.2629,8.1312,45119.0,1.1764,321810000000000.0,3838.2,0.4069,1
3,3,0.15236,0.007259,780.1,0.025179,0.51947,7.4914,112.51,259490.0,77814000000000.0,...,-34.858,2.0694,0.79631,-16.336,4952.4,1.1784,4533000000000.0,4889.1,0.51486,1
4,4,0.11623,0.5029,-109.15,0.29791,0.3449,-0.40932,2538.9,65332.0,1907200000000000.0,...,-13.641,1.5298,1.1464,-0.43124,3856.5,1.483,-8991300000000.0,,0.23049,1


In [4]:
train['nan_count'] = train.isna().sum(axis=1)
test['nan_count'] = test.isna().sum(axis=1)

features = [c for c in train.columns if c not in ('id', 'claim')]
pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='median', missing_values=np.nan)),
    ('scaler', QuantileTransformer(n_quantiles=128, output_distribution='uniform')),
    ('bin', KBinsDiscretizer(n_bins=128, encode='ordinal', strategy='uniform'))
])
train[features] = pipe.fit_transform(train[features])
test[features] = pipe.transform(test[features])

xtrain = train[features]
ytrain = train['claim']
xtest = test[features]

#splliting the dataset into train and validation data
X_train, X_valid, y_train, y_valid = train_test_split(xtrain, ytrain, 
                                                    test_size=0.25,
                                                    random_state=0)

In [5]:
#centering the data
scaler = StandardScaler()
xtrain = scaler.fit_transform(X_train)
xvalid = scaler.transform(X_valid)
test = scaler.transform(xtest)

In [None]:
input_shape = xtrain.shape[1:][0]

model = keras.Sequential([
    layers.Dense(input_shape, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.LayerNormalization(axis=-1),
    layers.Dropout(0.4),
    layers.Dense(64, activation='relu'),
    layers.LayerNormalization(axis=-1),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')    
])

auc = keras.metrics.AUC(name='auc')
optimizer = keras.optimizers.Adam(lr = 1e-2, epsilon=1e-9, decay=0.01)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics = [auc])
callback = tf.keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=5, restore_best_weights=True
            )
history = model.fit(x=np.float32(xtrain), 
          y=np.float32(y_train), 
          batch_size=1024, shuffle=True, 
          epochs=20,
          validation_data=(xvalid, y_valid),
          callbacks=[callback],
         )

2021-09-18 23:29:20.774830: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 341976964 exceeds 10% of free system memory.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20

In [9]:
submission_sample = pd.read_csv('../input/tabular-playground-series-sep-2021/sample_solution.csv')
submission_sample['claim'] = model.predict(np.float32(test))
submission_sample.to_csv('submission.csv', index=False)