In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelBinarizer, StandardScaler
from sklearn.linear_model import LogisticRegression, Lasso, Ridge, ElasticNet
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn_pandas import DataFrameMapper
from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_csv('gdrive/My Drive/Colab Notebooks/data/train.csv')

In [4]:
df.head(1)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28.0,0,> 2 Years,Yes,40454.0,26.0,217,1


In [5]:
df = df.drop(columns="id")

In [6]:
target = "Response"
X = df.drop(target, axis=1)
y = df[target]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [8]:
mapper = DataFrameMapper([
    ("Gender", LabelBinarizer()),
    (["Age"], StandardScaler()),
    (["Driving_License"], StandardScaler()),
    (["Region_Code"], StandardScaler()),
    (["Previously_Insured"], StandardScaler()),
    ("Vehicle_Age", LabelBinarizer()),
    ("Vehicle_Damage", LabelBinarizer()),
    (["Annual_Premium"], StandardScaler()),
    (["Policy_Sales_Channel"], StandardScaler()),
    (["Vintage"], StandardScaler())],df_out=True)

In [9]:
Z_train = mapper.fit_transform(X_train)

In [10]:
Z_test = mapper.transform(X_test)

In [11]:
model = LogisticRegression().fit(Z_train,y_train)
print("Logistic Regression train score is " + str(model.score(Z_train,y_train)))
print("Logistic Regression test score is " + str(model.score(Z_test,y_test)))

Logistic Regression train score is 0.8777424422123562
Logistic Regression test score is 0.8765087428367514


In [12]:
model = Lasso().fit(Z_train,y_train)
print("Lasso train score is " + str(model.score(Z_train,y_train)))
print("Lasso test score is " + str(model.score(Z_test,y_test)))

Lasso train score is 0.0
Lasso test score is -1.382410017258806e-05


In [13]:
model = Ridge().fit(Z_train,y_train)
print("Ridge train score is " + str(model.score(Z_train,y_train)))
print("Ridge test score is " + str(model.score(Z_test,y_test)))

Ridge train score is 0.14659402257305046
Ridge test score is 0.15044074636863525


In [14]:
model = ElasticNet().fit(Z_train,y_train)
print("Elastic Net train score is " + str(model.score(Z_train,y_train)))
print("Elastic Net test score is " + str(model.score(Z_test,y_test)))

Elastic Net train score is 0.0
Elastic Net test score is -1.382410017258806e-05


In [15]:
model = RandomForestClassifier().fit(Z_train,y_train)
print("Random Forest Classifier train score is " + str(model.score(Z_train,y_train)))
print("Random Forest Classifier test score is " + str(model.score(Z_test,y_test)))

Random Forest Classifier train score is 0.9998740514499826
Random Forest Classifier test score is 0.8660656185058461


In [16]:
model = DecisionTreeClassifier().fit(Z_train,y_train)
print("Decision Tree Classifier train score is " + str(model.score(Z_train,y_train)))
print("Decision Tree Classifier test score is " + str(model.score(Z_test,y_test)))

Decision Tree Classifier train score is 0.9998950428749855
Decision Tree Classifier test score is 0.8224039127605534


In [17]:
model = AdaBoostClassifier().fit(Z_train,y_train)
print("AdaBoost Classifier train score is " + str(model.score(Z_train,y_train)))
print("AdaBoost Classifier test score is " + str(model.score(Z_test,y_test)))

AdaBoost Classifier train score is 0.8777389436415224
AdaBoost Classifier test score is 0.8765192384390941


In [18]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

In [19]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=8,
    verbose=0,
    mode='auto',
    baseline=None, restore_best_weights=True
)

In [23]:
model = Sequential([
    Input(shape=(Z_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='elu')
])

model.compile(loss='binary_crossentropy', optimizer='adam')

In [24]:
history = model.fit(Z_train, y_train,
                    validation_data=(Z_test, y_test),
                    epochs=25, batch_size=32,
                    verbose=2, callbacks=[early_stopping])

model.summary()

Epoch 1/25
8933/8933 - 12s - loss: 0.2850 - val_loss: 0.2796
Epoch 2/25
8933/8933 - 11s - loss: 0.2792 - val_loss: 0.2766
Epoch 3/25
8933/8933 - 11s - loss: 0.2765 - val_loss: 0.2792
Epoch 4/25
8933/8933 - 11s - loss: 0.2768 - val_loss: 0.2773
Epoch 5/25
8933/8933 - 11s - loss: 0.2868 - val_loss: 0.2950
Epoch 6/25
8933/8933 - 11s - loss: 0.2841 - val_loss: 0.2766
Epoch 7/25
8933/8933 - 11s - loss: 0.2798 - val_loss: 0.2784
Epoch 8/25
8933/8933 - 11s - loss: 0.2839 - val_loss: 0.2839
Epoch 9/25
8933/8933 - 11s - loss: 0.2932 - val_loss: 0.2838
Epoch 10/25
8933/8933 - 11s - loss: 0.2937 - val_loss: 0.3112
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                832       
_________________________________________________________________
dense_5 (Dense)              (None, 32)                2080      
_____________________________________

In [22]:
model.evaluate(Z_test, y_test)



0.2944076955318451