## Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import layers
import xgboost as xgb
import lightgbm as lgbm
import catboost

## Data

In [None]:
train = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
train = train.set_index('id')
target = train['loss']
train = train.drop('loss', axis=1)
test = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv')
test = test.set_index('id')
preds = pd.read_csv('../input/tabular-playground-series-aug-2021/sample_submission.csv')

## EDA

In [None]:
train.describe()

In [None]:
plt.matshow(train.corr())
plt.colorbar()
plt.show()

In [None]:
train.hist(figsize=(50,30), bins=30)
plt.show()

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
train_pca = pca.fit_transform(train)
plt.scatter(train_pca[:,0], train_pca[:,1], s=0.1, c=target)
plt.colorbar()
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
train_sc = sc.fit_transform(train)
test_sc = sc.transform(test)
train_sc_pca = pca.fit_transform(train_sc)
plt.scatter(train_sc_pca[:,0], train_sc_pca[:,1], s=0.1, c=target)
plt.colorbar()
plt.show()

## Train, Validation split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(train_sc, target, random_state=0,
                                                  stratify=target, test_size=0.5)

## Models

### Scoring function

In [None]:
from sklearn.metrics import mean_squared_error

def score_model(mod, X, y):
    y_pred = mod.predict(X)
    return np.sqrt(mean_squared_error(y, y_pred))

## Predictions

In [None]:
preds_dict = {}

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=4)
rf.fit(X_train, y_train)
print(score_model(rf, X_val, y_val))

rf.fit(train_sc, target)
preds_dict['rf'] = rf.predict(test_sc)

### LightGBM

In [None]:
lgbmC = lgbm.LGBMClassifier(n_estimators=800, learning_rate=0.02, random_state=0)
lgbmC.fit(X_train, y_train)
print(score_model(lgbmC, X_val, y_val))

lgbmC.fit(train_sc, target)
preds_dict['lgbmC'] = lgbmC.predict(test_sc)

### CatBoost

In [None]:
cat = catboost.CatBoostClassifier(n_estimators=600, learning_rate=0.02, random_state=0)
cat.fit(X_train, y_train)
print(score_model(cat, X_val, y_val))

cat.fit(train_sc, target)
preds_dict['cat'] = cat.predict(test_sc)

### Neural Networks

In [None]:
from keras.callbacks import EarlyStopping

with tf.device('/gpu:0'):
    def block(inp, nsize, drop):
        x = layers.BatchNormalization()(inp)
        x = layers.Dense(nsize)(x)
        x = layers.Dropout(drop)(x)
        x = layers.Activation('relu')(x)
        return x
    
    def build_model():
        inp = layers.Input(shape=(train_sc.shape[1]))
        x = block(inp, 10000, 0.2)
        x = layers.Dense(target.nunique(), activation='softmax')(x)
        return keras.Model(inputs=inp, outputs=x)
    
    model = build_model()
    
    earlyStopping = EarlyStopping(min_delta=0.001, patience=10, verbose=1)
    
    model.compile(optimizer="Adam", loss="SparseCategoricalCrossentropy", 
                  metrics=['RootMeanSquaredError'])
    
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                       batch_size=512, epochs=100, callbacks=[earlyStopping])
    
    preds_dict['nn'] = model.predict(test_sc)

## Ensemble

In [None]:
total_preds = np.zeros(shape=preds.shape[0])
for i, pred in enumerate(preds_dict.values()):
    pred_ = pd.DataFrame(data=pred)
    pred_.to_csv('submission_'+ str(i) +'.csv', index=False)
    try:
        if (pred.shape[1] > 1):
            pred = pd.DataFrame(pred.argmax(axis=1))
        total_preds += pred
    except:
        print('how?')
try:
    total_preds /= len(preds_dict.keys())
    total_preds = pd.DataFrame(data=total_preds, columns=[preds.columns[1]], index=preds.id)
    total_preds.reset_index()
    total_preds.to_csv('submission.csv', index=False)
except:
    print('mecagüen')