In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.drop('id',axis = 1,inplace = True)

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
sns.set_style('whitegrid')
sns.FacetGrid(df,height=12,hue='target').map(sns.scatterplot,'f0','f2')

In [None]:
len(df)

In [None]:
y_temp = df.target
X_temp = df.iloc[:,:-1]

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_temp = sc.fit_transform(X_temp)

# Feature Selection

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [None]:
model.fit(X_temp[:10000],y_temp[:10000])

In [None]:
importance = model.coef_[0]
index = []
for i,v in enumerate(importance):
    if v < 0:
        index.append(i)
    print('Feature: %0d, Score: %.5f' % (i,v))

# Check The Importance Of features.

> *the values with negative have less impact on the data*

In [None]:
plt.figure(figsize = (20,8))
plt.bar([x for x in range(len(importance))], importance)
plt.show()

# These Features are with more effect on data.

In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_temp[:10000],y_temp[:10000])

importance = model.feature_importances_
value = []

for i,v in enumerate(importance):
    value.append(v)
    print('Feature: %0d, Score: %.5f' % (i,v))
    
plt.figure(figsize = (20,8))
plt.bar([x for x in range(len(importance))], importance)
plt.show()

In [None]:
np.mean(value)

# We take the features which have mean greater than mean from all values.

In [None]:
main_f = []
for i,v in enumerate(importance):
    if v > 0.010000000000000002:
        main_f.append(i)

In [None]:
df.columns

In [None]:
for i in range(len(index)):
    index[i] = 'f'+str(index[i])

In [None]:
for i in range(len(main_f)):
    main_f[i] = 'f'+str(main_f[i])

In [None]:
main_f

## These are the values greater than mean among dataset

In [None]:
df_new = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')

In [None]:
common = []
for i in range(len(main_f)):
    if main_f[i] in index:
        common.append(main_f[i])
uncommon = []
for i in range(len(index)):
    if not(index[i] in common):
        uncommon.append(index[i])
        

In [None]:
len(uncommon + common)

### some features are common so let's take the unique values

In [None]:
uncommon + common

In [None]:
df_new.drop(uncommon,axis=1,inplace =True)

In [None]:
df_new.drop(common,axis = 1,inplace  = True)
df_new.head()

In [None]:
df_new.drop('id',axis = 1,inplace = True)

In [None]:
df_new.head(1)

In [None]:
y = df_new.target
X = df_new.iloc[:,:-1]

In [None]:
X

In [None]:
X = sc.fit_transform(X)

In [None]:
x_train,x_val,y_train,y_val = train_test_split(X,y,test_size = 0.025,random_state = 0)

In [None]:
x_train.shape,y_train.shape

In [None]:
x_val.shape,y_val.shape

# After taking major features our dataset of 100 features comes to 45

### 45 features have major effect on dataset

# Model Implementation

we use here swish activation function which is *f(x) = x · sigmoid(x)*

In [None]:
x_input = tf.keras.layers.Input(shape=(44))
x1 = tf.keras.layers.Dense(384, activation='swish')(x_input)
x1 = tf.keras.layers.BatchNormalization()(x1)
x2 = tf.keras.layers.Dropout(0.45)(x1)

x2 = tf.keras.layers.Dense(192, activation='swish')(x2)
x2 = tf.keras.layers.BatchNormalization()(x2)
x3 = tf.keras.layers.Dropout(0.35)(x2)

x3 = tf.keras.layers.Dense(96, activation='swish')(x3)
x3 = tf.keras.layers.BatchNormalization()(x3)
x3 = tf.keras.layers.Dropout(0.25)(x3)

x4 = tf.keras.layers.Dense(192, activation='swish')(x3)
x4 = tf.keras.layers.BatchNormalization()(x4)
x4 = tf.keras.layers.Multiply()([x2, x4])
x4 = tf.keras.layers.Dropout(0.35)(x4)

x5 = tf.keras.layers.Dense(384, activation='swish')(x4)
x5 = tf.keras.layers.BatchNormalization()(x5)
x5 = tf.keras.layers.Multiply()([x1, x5])
x5 = tf.keras.layers.Dropout(0.45)(x5)

x = tf.keras.layers.Concatenate()([x3, x5])
x = tf.keras.layers.Dense(128, activation='swish')(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(0.25)(x)

x_output = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model_nn = tf.keras.Model(inputs = x_input,outputs = x_output)

In [None]:
model_nn.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=1e-2), loss="binary_crossentropy", metrics=['AUC'])


lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.25, patience=4)
es = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=15,mode="min", restore_best_weights=True)
   

In [None]:
BS = 128
SPE = len(df)//BS

In [None]:
class CallKar(tf.keras.callbacks.Callback):
    def on_epoch_end(self,epochs,logs = {}):
        if logs.get('loss') < 0.3:
            self.model.stop_training = True
cl = CallKar()

In [None]:
hist = model_nn.fit(x_train,y_train,
                    batch_size=BS,epochs=50,
                    callbacks=[cl,es,lr],
                    validation_data=(x_val,y_val),
                    steps_per_epoch=SPE,shuffle=True)

In [None]:
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.show()

In [None]:
plt.plot(hist.history['auc'])
plt.plot(hist.history['val_auc'])
plt.show()

In [None]:
df_test = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')

In [None]:
df_test.shape
id = df_test.id
df_test.drop('id',axis = 1,inplace = True)

In [None]:
df_test.head()

In [None]:
df_test.drop(uncommon,axis=1,inplace =True)
df_test.drop(common,axis = 1,inplace  = True)

In [None]:
df_test.shape

In [None]:
X_test = sc.fit_transform(df_test)

In [None]:
X_test

In [None]:
predict = model_nn.predict(X_test)

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')

In [None]:
submission['target'] = predict

In [None]:
submission.to_csv('submission.csv',index=False)