In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [9]:
! kaggle datasets list
! kaggle competitions download -c santander-customer-satisfaction

ref                                                         title                                                size  lastUpdated          downloadCount  voteCount  usabilityRating  
----------------------------------------------------------  --------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
allen-institute-for-ai/CORD-19-research-challenge           COVID-19 Open Research Dataset Challenge (CORD-19)    3GB  2020-05-27 03:08:03          71785       6832  0.88235295       
roche-data-science-coalition/uncover                        UNCOVER COVID-19 Challenge                          179MB  2020-05-21 18:57:53          10483        730  0.8235294        
susuwatari/epa-vehicle-dataset-19802021                     EPA Vehicle Dataset (~1980-2021)                      2MB  2020-05-10 18:20:19            508         24  0.85294116       
sudalairajkumar/nifty-indices-dataset                       Nifty Indices Datase

In [13]:
dataset=pd.read_csv('./santander-customer-satisfaction/train.csv')

In [14]:
x = dataset.drop(labels=['ID','TARGET'], axis=1)

In [15]:
y = dataset['TARGET']

In [69]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [70]:
from sklearn.feature_selection import VarianceThreshold
variance=VarianceThreshold(threshold=0.01)

In [71]:
X_train=variance.fit_transform(X_train)

In [72]:
X_test=variance.transform(X_test)

In [73]:
X_train.shape

(60816, 272)

In [74]:
X_train_t=pd.DataFrame(X_train.T)

In [75]:
X_train_t.shape

(272, 60816)

In [76]:
dp=X_train_t.duplicated()

In [77]:
index_=[not x for x in dp]

In [78]:
X_train=X_train_t[index_].T

In [79]:
X_test_t=pd.DataFrame(X_test.T)
X_test=X_test_t[index_].T

In [80]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [81]:
X_train=X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_test=X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [82]:
X_train.shape

(60816, 255, 1)

In [92]:
model=tf.keras.models.Sequential()

In [93]:
model.add(tf.keras.layers.Conv1D(filters=32,kernel_size=3,activation='relu',input_shape=(255,1)))

model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.MaxPool1D(pool_size=2))

model.add(tf.keras.layers.Dropout(0.3))

In [94]:
model.add(tf.keras.layers.Conv1D(filters=64,kernel_size=3,activation='relu'))

model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.MaxPool1D(pool_size=2))

model.add(tf.keras.layers.Dropout(0.3))

In [95]:
model.add(tf.keras.layers.Conv1D(filters=128,kernel_size=3,activation='relu'))

model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.MaxPool1D(pool_size=2))

model.add(tf.keras.layers.Dropout(0.3))

In [96]:
model.add(tf.keras.layers.Flatten())

In [97]:
model.add(tf.keras.layers.Dense(units=256,activation='relu'))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

In [98]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 253, 32)           128       
_________________________________________________________________
batch_normalization_5 (Batch (None, 253, 32)           128       
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 126, 32)           0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 126, 32)           0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 124, 64)           6208      
_________________________________________________________________
batch_normalization_6 (Batch (None, 124, 64)           256       
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 62, 64)           

In [124]:
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
]

model.compile(optimizer='adam',loss=tf.keras.losses.BinaryCrossentropy(),metrics=METRICS)

from sklearn.utils import class_weight

cw=class_weight.compute_class_weight('balanced',np.unique(y_train),y_train)


In [127]:
cw
class_weight = {0: cw[0], 1: cw[1]}

In [128]:
history=model.fit(X_train,y_train,class_weight=class_weight,epochs=3)

Train on 60816 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [130]:
model.evaluate(X_test,y_test)



[0.5096982388833861,
 475.0,
 3661.0,
 10915.0,
 153.0,
 0.749145,
 0.11484526,
 0.7563694,
 0.82205343]

In [131]:
#print(acc)

In [132]:
from sklearn.metrics import confusion_matrix

In [133]:
y_pred=model.predict_classes(X_test)
cm=confusion_matrix(y_test,y_pred)
print(cm)

[[10915  3661]
 [  153   475]]


In [135]:
history.history['accuracy']

[0.7340503, 0.74123585, 0.74694157]