In [30]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
# for plotting
import matplotlib.pyplot as plt

# to evaluate the model
from sklearn.metrics import accuracy_score, roc_auc_score


In [51]:
train = pd.read_csv("train.csv")
X_test = pd.read_csv("test.csv")
X_test_2 = pd.read_csv("test.csv")
sample = pd.read_csv("sample_submission.csv")

In [3]:
X_train = train.drop('Transported',axis = 1)


In [4]:
y_train = train["Transported"]

In [5]:
X_train.columns

Index(['PassengerId', 'HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'Age',
       'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck',
       'Name'],
      dtype='object')

In [6]:
print("Train Shape:",X_train.shape)
print("Test shape:", X_test.shape)

Train Shape: (8693, 13)
Test shape: (4277, 13)


In [7]:
def split_columns(df:pd.DataFrame, X_train = X_train):
    
    df1 = df.copy()
    if 'PassengerId' in df.columns:
    #split the columns as per data description
        df1[['Group','Num_ppl_in_group']] = X_train['PassengerId'].str.split("_",expand = True)
        df1[['Deck','Num','Side']] = X_train['Cabin'].str.split('/',expand = True)
        df1.drop(["Cabin","PassengerId","Name","Group"], inplace = True, axis = 1)
        #cast the columns as integer that has num in column names
        df1['Num'] = pd.to_numeric(df1['Num'], errors='coerce').fillna(0).astype('int32')
        df1['Num_ppl_in_group'] = pd.to_numeric(df1['Num_ppl_in_group'], errors='coerce').fillna(0).astype('int32')
        return df1

In [8]:
X_train = split_columns(X_train)
X_test = split_columns(X_test)
print("Train Shape:",X_train.shape)
print("Test shape:", X_test.shape)

Train Shape: (8693, 14)
Test shape: (4277, 14)


In [9]:
X_test.columns

Index(['HomePlanet', 'CryoSleep', 'Destination', 'Age', 'VIP', 'RoomService',
       'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 'Num_ppl_in_group',
       'Deck', 'Num', 'Side'],
      dtype='object')

In [10]:
cat_cols = [col for col in X_train.columns if X_train[col].dtype == 'O']
print("Categorical columns",cat_cols)
num_cols = [col for col in X_train.columns if col not in cat_cols]
print("Numeric columns",num_cols)

Categorical columns ['HomePlanet', 'CryoSleep', 'Destination', 'VIP', 'Deck', 'Side']
Numeric columns ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 'Num_ppl_in_group', 'Num']


### Differentiate Numeric and categorical columns with null values

In [11]:
cols_na = [col for col in X_train.columns if X_train[col].isnull().sum() > 0]

cat_na = [col for col in cat_cols if col in cols_na]
num_na = [col for col in num_cols if col in cols_na]

print("Categorical null columns: ", cat_na )
print("Numeric NULL columns: ",num_na)

Categorical null columns:  ['HomePlanet', 'CryoSleep', 'Destination', 'VIP', 'Deck', 'Side']
Numeric NULL columns:  ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']


### Missing Values in categorical columns
replace with string "missing" if >0.1 values are null 

replace with mode if <0.1 percent values are null

In [12]:
string_missing_columns = [col for col in cat_na if X_train[col].isnull().sum()>0.1]
mode_columns = [col for col in cat_na if col not in string_missing_columns]

In [13]:
print(string_missing_columns)
print("Mode columns", mode_columns)

['HomePlanet', 'CryoSleep', 'Destination', 'VIP', 'Deck', 'Side']
Mode columns []


In [14]:
for col in string_missing_columns:
    X_train[col].fillna("Missing",inplace = True)
    if col in X_test.columns:
        X_test[col].fillna("Missing",inplace = True)
    
    print("Nulls count{} : {}".format(col,X_train[col].isnull().sum()))

if len(mode_columns) > 0:
    mode_imputer = SimpleImputer(missing_values=np.nan, strategy="mode")
    for col in mode_columns:
        model_imputer = mode_imputer.fit(X_train[col])
        X_train[col] = model_imputer.transform(X_train[col])
        
        X_test[col] = model_imputer.transform(X_test[col])
    

Nulls countHomePlanet : 0
Nulls countCryoSleep : 0
Nulls countDestination : 0
Nulls countVIP : 0
Nulls countDeck : 0
Nulls countSide : 0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train[col].fillna("Missing",inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_test[col].fillna("Missing",inplace = True)


In [15]:
### fill -1 for numeric Missing values as Age and amount spent we cannot impute
for col in num_na:
    X_train[col].fillna(-1, inplace = True)
    X_test[col].fillna(-1, inplace = True)
    print("Nulls count{} : {}".format(col,X_train[col].isnull().sum()))

Nulls countAge : 0
Nulls countRoomService : 0
Nulls countFoodCourt : 0
Nulls countShoppingMall : 0
Nulls countSpa : 0
Nulls countVRDeck : 0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_train[col].fillna(-1, inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X_test[col].fillna(-1, inplace = True)


### OneHotEncoding

In [16]:
for var in cat_cols:
    get_dummies = pd.get_dummies(X_train[var],prefix = var,drop_first=True, dtype = 'int')
    test_dummies = pd.get_dummies(X_test[var], prefix = var,drop_first=True, dtype = 'int')
    X_train = pd.concat([X_train,get_dummies],axis = 1)
    X_test = pd.concat([X_test,test_dummies],axis = 1)
    
    X_train.drop(var, axis = 1, inplace = True)
    X_test.drop(var, axis = 1, inplace = True)

In [17]:
nulls = [print(col, X_train[col].isnull().sum()) for col in X_train.columns]

Age 0
RoomService 0
FoodCourt 0
ShoppingMall 0
Spa 0
VRDeck 0
Num_ppl_in_group 0
Num 0
HomePlanet_Europa 0
HomePlanet_Mars 0
HomePlanet_Missing 0
CryoSleep_True 0
CryoSleep_Missing 0
Destination_Missing 0
Destination_PSO J318.5-22 0
Destination_TRAPPIST-1e 0
VIP_True 0
VIP_Missing 0
Deck_B 0
Deck_C 0
Deck_D 0
Deck_E 0
Deck_F 0
Deck_G 0
Deck_Missing 0
Deck_T 0
Side_P 0
Side_S 0


In [18]:
nulls = [print(col, X_test[col].isnull().sum()) for col in X_test.columns]

Age 0
RoomService 0
FoodCourt 0
ShoppingMall 0
Spa 0
VRDeck 0
Num_ppl_in_group 0
Num 0
HomePlanet_Europa 0
HomePlanet_Mars 0
HomePlanet_Missing 0
CryoSleep_True 0
CryoSleep_Missing 0
Destination_Missing 0
Destination_PSO J318.5-22 0
Destination_TRAPPIST-1e 0
VIP_True 0
VIP_Missing 0
Deck_B 0
Deck_C 0
Deck_D 0
Deck_E 0
Deck_F 0
Deck_G 0
Deck_Missing 0
Deck_T 0
Side_P 0
Side_S 0


In [19]:

num_cols.remove('Age')
X_train[num_cols]

Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Num_ppl_in_group,Num
0,0.0,0.0,0.0,0.0,0.0,1,0
1,109.0,9.0,25.0,549.0,44.0,1,0
2,43.0,3576.0,0.0,6715.0,49.0,1,0
3,0.0,1283.0,371.0,3329.0,193.0,2,0
4,303.0,70.0,151.0,565.0,2.0,1,1
...,...,...,...,...,...,...,...
8688,0.0,6819.0,0.0,1643.0,74.0,1,98
8689,0.0,0.0,0.0,0.0,0.0,1,1499
8690,0.0,0.0,1872.0,1.0,0.0,1,1500
8691,0.0,1049.0,0.0,353.0,3235.0,1,608


In [20]:
scaler = StandardScaler()
for col in num_cols:
    print(col)
    X_train[col] = scaler.fit_transform(X_train[[col]])
    X_test[col] = scaler.transform(X_test[[col]])

RoomService
FoodCourt
ShoppingMall
Spa
VRDeck
Num_ppl_in_group
Num


In [21]:
X_train.head()

Unnamed: 0,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Num_ppl_in_group,Num,HomePlanet_Europa,HomePlanet_Mars,...,Deck_B,Deck_C,Deck_D,Deck_E,Deck_F,Deck_G,Deck_Missing,Deck_T,Side_P,Side_S
0,39.0,-0.33307,-0.281012,-0.283535,-0.270606,-0.262983,-0.491161,-1.141624,1,0,...,1,0,0,0,0,0,0,0,1,0
1,24.0,-0.16804,-0.275372,-0.241728,0.217176,-0.224184,-0.491161,-1.141624,0,0,...,0,0,0,0,1,0,0,0,0,1
2,58.0,-0.267966,1.960004,-0.283535,5.695613,-0.219776,-0.491161,-1.141624,1,0,...,0,0,0,0,0,0,0,0,0,1
3,33.0,-0.33307,0.523021,0.336887,2.687181,-0.092799,0.457443,-1.141624,1,0,...,0,0,0,0,0,0,0,0,0,1
4,16.0,0.125682,-0.237145,-0.031018,0.231392,-0.261219,-0.491161,-1.139678,0,0,...,0,0,0,0,1,0,0,0,0,1


In [27]:
y_test = sample['Transported']

Accuracy 0.47486555997194296


In [33]:
model = LogisticRegression(penalty="elasticnet", solver="saga", max_iter=1000)
params = {'C':[0.1, 0.5, 1.0, 10.0], 'l1_ratio':[0.0, 0.2, 0.5, 0.8, 1.0]}
grid_search = GridSearchCV(estimator=model, param_grid=params, cv=10, scoring="accuracy", verbose=3)

In [34]:
grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 20 candidates, totalling 200 fits
[CV 1/10] END ..............C=0.1, l1_ratio=0.0;, score=0.772 total time=   1.6s
[CV 2/10] END ..............C=0.1, l1_ratio=0.0;, score=0.757 total time=   1.7s
[CV 3/10] END ..............C=0.1, l1_ratio=0.0;, score=0.779 total time=   1.6s
[CV 4/10] END ..............C=0.1, l1_ratio=0.0;, score=0.801 total time=   1.5s
[CV 5/10] END ..............C=0.1, l1_ratio=0.0;, score=0.799 total time=   1.6s
[CV 6/10] END ..............C=0.1, l1_ratio=0.0;, score=0.799 total time=   1.8s
[CV 7/10] END ..............C=0.1, l1_ratio=0.0;, score=0.786 total time=   2.2s
[CV 8/10] END ..............C=0.1, l1_ratio=0.0;, score=0.792 total time=   1.7s
[CV 9/10] END ..............C=0.1, l1_ratio=0.0;, score=0.808 total time=   1.7s
[CV 10/10] END .............C=0.1, l1_ratio=0.0;, score=0.773 total time=   1.6s
[CV 1/10] END ..............C=0.1, l1_ratio=0.2;, score=0.775 total time=   3.0s
[CV 2/10] END ..............C=0.1, l1_ratio=0.



[CV 6/10] END ..............C=0.5, l1_ratio=0.0;, score=0.802 total time=   4.0s




[CV 7/10] END ..............C=0.5, l1_ratio=0.0;, score=0.791 total time=   3.3s
[CV 8/10] END ..............C=0.5, l1_ratio=0.0;, score=0.793 total time=   3.5s
[CV 9/10] END ..............C=0.5, l1_ratio=0.0;, score=0.812 total time=   3.3s
[CV 10/10] END .............C=0.5, l1_ratio=0.0;, score=0.774 total time=   3.3s
[CV 1/10] END ..............C=0.5, l1_ratio=0.2;, score=0.779 total time=   5.1s
[CV 2/10] END ..............C=0.5, l1_ratio=0.2;, score=0.766 total time=   4.1s
[CV 3/10] END ..............C=0.5, l1_ratio=0.2;, score=0.782 total time=   4.1s
[CV 4/10] END ..............C=0.5, l1_ratio=0.2;, score=0.795 total time=   4.0s
[CV 5/10] END ..............C=0.5, l1_ratio=0.2;, score=0.800 total time=   4.0s




[CV 6/10] END ..............C=0.5, l1_ratio=0.2;, score=0.802 total time=   4.5s




[CV 7/10] END ..............C=0.5, l1_ratio=0.2;, score=0.792 total time=   4.7s




[CV 8/10] END ..............C=0.5, l1_ratio=0.2;, score=0.794 total time=   4.9s
[CV 9/10] END ..............C=0.5, l1_ratio=0.2;, score=0.814 total time=   4.3s
[CV 10/10] END .............C=0.5, l1_ratio=0.2;, score=0.774 total time=   3.7s
[CV 1/10] END ..............C=0.5, l1_ratio=0.5;, score=0.778 total time=   4.6s
[CV 2/10] END ..............C=0.5, l1_ratio=0.5;, score=0.766 total time=   3.9s
[CV 3/10] END ..............C=0.5, l1_ratio=0.5;, score=0.782 total time=   4.6s
[CV 4/10] END ..............C=0.5, l1_ratio=0.5;, score=0.797 total time=   3.8s
[CV 5/10] END ..............C=0.5, l1_ratio=0.5;, score=0.800 total time=   4.4s




[CV 6/10] END ..............C=0.5, l1_ratio=0.5;, score=0.802 total time=   4.9s




[CV 7/10] END ..............C=0.5, l1_ratio=0.5;, score=0.792 total time=   4.6s




[CV 8/10] END ..............C=0.5, l1_ratio=0.5;, score=0.794 total time=   4.8s




[CV 9/10] END ..............C=0.5, l1_ratio=0.5;, score=0.814 total time=   4.6s
[CV 10/10] END .............C=0.5, l1_ratio=0.5;, score=0.774 total time=   3.8s




[CV 1/10] END ..............C=0.5, l1_ratio=0.8;, score=0.778 total time=   4.6s
[CV 2/10] END ..............C=0.5, l1_ratio=0.8;, score=0.766 total time=   4.6s




[CV 3/10] END ..............C=0.5, l1_ratio=0.8;, score=0.780 total time=   4.5s




[CV 4/10] END ..............C=0.5, l1_ratio=0.8;, score=0.796 total time=   4.8s




[CV 5/10] END ..............C=0.5, l1_ratio=0.8;, score=0.799 total time=   5.0s




[CV 6/10] END ..............C=0.5, l1_ratio=0.8;, score=0.802 total time=   4.7s




[CV 7/10] END ..............C=0.5, l1_ratio=0.8;, score=0.791 total time=   4.7s




[CV 8/10] END ..............C=0.5, l1_ratio=0.8;, score=0.794 total time=   5.0s




[CV 9/10] END ..............C=0.5, l1_ratio=0.8;, score=0.814 total time=   5.2s




[CV 10/10] END .............C=0.5, l1_ratio=0.8;, score=0.773 total time=   5.4s




[CV 1/10] END ..............C=0.5, l1_ratio=1.0;, score=0.780 total time=   4.4s




[CV 2/10] END ..............C=0.5, l1_ratio=1.0;, score=0.766 total time=   4.8s
[CV 3/10] END ..............C=0.5, l1_ratio=1.0;, score=0.780 total time=   4.3s




[CV 4/10] END ..............C=0.5, l1_ratio=1.0;, score=0.796 total time=   4.5s




[CV 5/10] END ..............C=0.5, l1_ratio=1.0;, score=0.800 total time=   4.7s




[CV 6/10] END ..............C=0.5, l1_ratio=1.0;, score=0.802 total time=   4.4s




[CV 7/10] END ..............C=0.5, l1_ratio=1.0;, score=0.792 total time=   4.6s




[CV 8/10] END ..............C=0.5, l1_ratio=1.0;, score=0.793 total time=   5.1s




[CV 9/10] END ..............C=0.5, l1_ratio=1.0;, score=0.812 total time=   4.9s
[CV 10/10] END .............C=0.5, l1_ratio=1.0;, score=0.773 total time=   4.1s




[CV 1/10] END ..............C=1.0, l1_ratio=0.0;, score=0.779 total time=   3.5s




[CV 2/10] END ..............C=1.0, l1_ratio=0.0;, score=0.766 total time=   3.8s




[CV 3/10] END ..............C=1.0, l1_ratio=0.0;, score=0.782 total time=   3.4s




[CV 4/10] END ..............C=1.0, l1_ratio=0.0;, score=0.795 total time=   3.8s




[CV 5/10] END ..............C=1.0, l1_ratio=0.0;, score=0.800 total time=   3.9s




[CV 6/10] END ..............C=1.0, l1_ratio=0.0;, score=0.803 total time=   3.9s




[CV 7/10] END ..............C=1.0, l1_ratio=0.0;, score=0.791 total time=   3.6s




[CV 8/10] END ..............C=1.0, l1_ratio=0.0;, score=0.793 total time=   3.2s




[CV 9/10] END ..............C=1.0, l1_ratio=0.0;, score=0.814 total time=   3.5s




[CV 10/10] END .............C=1.0, l1_ratio=0.0;, score=0.773 total time=   3.4s




[CV 1/10] END ..............C=1.0, l1_ratio=0.2;, score=0.779 total time=   5.1s
[CV 2/10] END ..............C=1.0, l1_ratio=0.2;, score=0.766 total time=   4.7s




[CV 3/10] END ..............C=1.0, l1_ratio=0.2;, score=0.782 total time=   4.9s




[CV 4/10] END ..............C=1.0, l1_ratio=0.2;, score=0.795 total time=   5.1s




[CV 5/10] END ..............C=1.0, l1_ratio=0.2;, score=0.800 total time=   4.8s




[CV 6/10] END ..............C=1.0, l1_ratio=0.2;, score=0.801 total time=   4.9s




[CV 7/10] END ..............C=1.0, l1_ratio=0.2;, score=0.791 total time=   5.1s




[CV 8/10] END ..............C=1.0, l1_ratio=0.2;, score=0.793 total time=   5.0s




[CV 9/10] END ..............C=1.0, l1_ratio=0.2;, score=0.814 total time=   5.4s




[CV 10/10] END .............C=1.0, l1_ratio=0.2;, score=0.773 total time=   5.3s




[CV 1/10] END ..............C=1.0, l1_ratio=0.5;, score=0.780 total time=   4.9s
[CV 2/10] END ..............C=1.0, l1_ratio=0.5;, score=0.766 total time=   4.8s




[CV 3/10] END ..............C=1.0, l1_ratio=0.5;, score=0.782 total time=   4.9s




[CV 4/10] END ..............C=1.0, l1_ratio=0.5;, score=0.795 total time=   4.7s




[CV 5/10] END ..............C=1.0, l1_ratio=0.5;, score=0.800 total time=   4.8s




[CV 6/10] END ..............C=1.0, l1_ratio=0.5;, score=0.801 total time=   5.2s




[CV 7/10] END ..............C=1.0, l1_ratio=0.5;, score=0.792 total time=   4.7s




[CV 8/10] END ..............C=1.0, l1_ratio=0.5;, score=0.794 total time=   4.1s




[CV 9/10] END ..............C=1.0, l1_ratio=0.5;, score=0.812 total time=   4.3s




[CV 10/10] END .............C=1.0, l1_ratio=0.5;, score=0.774 total time=   4.6s




[CV 1/10] END ..............C=1.0, l1_ratio=0.8;, score=0.782 total time=   4.2s
[CV 2/10] END ..............C=1.0, l1_ratio=0.8;, score=0.766 total time=   4.0s




[CV 3/10] END ..............C=1.0, l1_ratio=0.8;, score=0.782 total time=   4.7s




[CV 4/10] END ..............C=1.0, l1_ratio=0.8;, score=0.795 total time=   4.7s




[CV 5/10] END ..............C=1.0, l1_ratio=0.8;, score=0.800 total time=   4.6s




[CV 6/10] END ..............C=1.0, l1_ratio=0.8;, score=0.800 total time=   4.7s




[CV 7/10] END ..............C=1.0, l1_ratio=0.8;, score=0.792 total time=   4.9s




[CV 8/10] END ..............C=1.0, l1_ratio=0.8;, score=0.794 total time=   4.7s




[CV 9/10] END ..............C=1.0, l1_ratio=0.8;, score=0.812 total time=   4.6s




[CV 10/10] END .............C=1.0, l1_ratio=0.8;, score=0.774 total time=   4.9s




[CV 1/10] END ..............C=1.0, l1_ratio=1.0;, score=0.784 total time=   4.5s




[CV 2/10] END ..............C=1.0, l1_ratio=1.0;, score=0.766 total time=   4.5s




[CV 3/10] END ..............C=1.0, l1_ratio=1.0;, score=0.782 total time=   4.7s




[CV 4/10] END ..............C=1.0, l1_ratio=1.0;, score=0.795 total time=   5.4s




[CV 5/10] END ..............C=1.0, l1_ratio=1.0;, score=0.800 total time=   4.7s




[CV 6/10] END ..............C=1.0, l1_ratio=1.0;, score=0.801 total time=   4.6s




[CV 7/10] END ..............C=1.0, l1_ratio=1.0;, score=0.792 total time=   5.0s




[CV 8/10] END ..............C=1.0, l1_ratio=1.0;, score=0.794 total time=   4.6s




[CV 9/10] END ..............C=1.0, l1_ratio=1.0;, score=0.812 total time=   4.2s




[CV 10/10] END .............C=1.0, l1_ratio=1.0;, score=0.774 total time=   4.9s




[CV 1/10] END .............C=10.0, l1_ratio=0.0;, score=0.780 total time=   3.7s




[CV 2/10] END .............C=10.0, l1_ratio=0.0;, score=0.767 total time=   3.6s




[CV 3/10] END .............C=10.0, l1_ratio=0.0;, score=0.782 total time=   3.4s




[CV 4/10] END .............C=10.0, l1_ratio=0.0;, score=0.796 total time=   3.5s




[CV 5/10] END .............C=10.0, l1_ratio=0.0;, score=0.801 total time=   3.6s




[CV 6/10] END .............C=10.0, l1_ratio=0.0;, score=0.801 total time=   3.2s




[CV 7/10] END .............C=10.0, l1_ratio=0.0;, score=0.792 total time=   3.6s




[CV 8/10] END .............C=10.0, l1_ratio=0.0;, score=0.793 total time=   3.4s




[CV 9/10] END .............C=10.0, l1_ratio=0.0;, score=0.812 total time=   3.7s




[CV 10/10] END ............C=10.0, l1_ratio=0.0;, score=0.773 total time=   3.7s




[CV 1/10] END .............C=10.0, l1_ratio=0.2;, score=0.780 total time=   5.1s




[CV 2/10] END .............C=10.0, l1_ratio=0.2;, score=0.767 total time=   5.6s




[CV 3/10] END .............C=10.0, l1_ratio=0.2;, score=0.782 total time=   5.4s




[CV 4/10] END .............C=10.0, l1_ratio=0.2;, score=0.796 total time=   5.0s




[CV 5/10] END .............C=10.0, l1_ratio=0.2;, score=0.801 total time=   5.1s




[CV 6/10] END .............C=10.0, l1_ratio=0.2;, score=0.801 total time=   4.9s




[CV 7/10] END .............C=10.0, l1_ratio=0.2;, score=0.792 total time=   5.8s




[CV 8/10] END .............C=10.0, l1_ratio=0.2;, score=0.793 total time=   6.1s




[CV 9/10] END .............C=10.0, l1_ratio=0.2;, score=0.812 total time=   5.1s




[CV 10/10] END ............C=10.0, l1_ratio=0.2;, score=0.773 total time=   4.7s




[CV 1/10] END .............C=10.0, l1_ratio=0.5;, score=0.780 total time=   5.2s




[CV 2/10] END .............C=10.0, l1_ratio=0.5;, score=0.767 total time=   4.7s




[CV 3/10] END .............C=10.0, l1_ratio=0.5;, score=0.782 total time=   4.8s




[CV 4/10] END .............C=10.0, l1_ratio=0.5;, score=0.796 total time=   4.6s




[CV 5/10] END .............C=10.0, l1_ratio=0.5;, score=0.801 total time=   5.7s




[CV 6/10] END .............C=10.0, l1_ratio=0.5;, score=0.801 total time=   4.6s




[CV 7/10] END .............C=10.0, l1_ratio=0.5;, score=0.792 total time=   4.7s




[CV 8/10] END .............C=10.0, l1_ratio=0.5;, score=0.793 total time=   5.4s




[CV 9/10] END .............C=10.0, l1_ratio=0.5;, score=0.812 total time=   4.7s




[CV 10/10] END ............C=10.0, l1_ratio=0.5;, score=0.772 total time=   4.7s




[CV 1/10] END .............C=10.0, l1_ratio=0.8;, score=0.780 total time=   5.2s




[CV 2/10] END .............C=10.0, l1_ratio=0.8;, score=0.767 total time=   4.8s




[CV 3/10] END .............C=10.0, l1_ratio=0.8;, score=0.782 total time=   4.6s




[CV 4/10] END .............C=10.0, l1_ratio=0.8;, score=0.796 total time=   5.1s




[CV 5/10] END .............C=10.0, l1_ratio=0.8;, score=0.801 total time=   4.6s




[CV 6/10] END .............C=10.0, l1_ratio=0.8;, score=0.801 total time=   4.5s




[CV 7/10] END .............C=10.0, l1_ratio=0.8;, score=0.792 total time=   4.9s




[CV 8/10] END .............C=10.0, l1_ratio=0.8;, score=0.793 total time=   5.4s




[CV 9/10] END .............C=10.0, l1_ratio=0.8;, score=0.812 total time=   4.6s




[CV 10/10] END ............C=10.0, l1_ratio=0.8;, score=0.773 total time=   4.4s




[CV 1/10] END .............C=10.0, l1_ratio=1.0;, score=0.780 total time=   5.0s




[CV 2/10] END .............C=10.0, l1_ratio=1.0;, score=0.767 total time=   4.6s




[CV 3/10] END .............C=10.0, l1_ratio=1.0;, score=0.782 total time=   5.0s




[CV 4/10] END .............C=10.0, l1_ratio=1.0;, score=0.796 total time=   5.6s




[CV 5/10] END .............C=10.0, l1_ratio=1.0;, score=0.801 total time=   4.4s




[CV 6/10] END .............C=10.0, l1_ratio=1.0;, score=0.801 total time=   4.5s




[CV 7/10] END .............C=10.0, l1_ratio=1.0;, score=0.792 total time=   4.7s




[CV 8/10] END .............C=10.0, l1_ratio=1.0;, score=0.793 total time=   4.7s




[CV 9/10] END .............C=10.0, l1_ratio=1.0;, score=0.812 total time=   4.5s




[CV 10/10] END ............C=10.0, l1_ratio=1.0;, score=0.773 total time=   4.8s




In [35]:
grid_search.best_score_, grid_search.best_params_

(0.7899503987936987, {'C': 1.0, 'l1_ratio': 1.0})

In [36]:
grid_search.best_estimator_

In [37]:
model = LogisticRegression(penalty="elasticnet", solver="saga", max_iter=1000)
params = {'C':[1.2,1.0, 0.8], 'l1_ratio':np.arange(0.9,1.0,0.04)}
grid_2 = GridSearchCV(estimator=model, param_grid=params, cv=10, scoring="accuracy", verbose=3)

In [38]:
grid_2.fit(X_train, y_train)

Fitting 10 folds for each of 9 candidates, totalling 90 fits




[CV 1/10] END ..............C=1.2, l1_ratio=0.9;, score=0.782 total time=   4.7s




[CV 2/10] END ..............C=1.2, l1_ratio=0.9;, score=0.767 total time=   4.7s




[CV 3/10] END ..............C=1.2, l1_ratio=0.9;, score=0.782 total time=   4.8s




[CV 4/10] END ..............C=1.2, l1_ratio=0.9;, score=0.795 total time=   4.4s




[CV 5/10] END ..............C=1.2, l1_ratio=0.9;, score=0.800 total time=   4.9s




[CV 6/10] END ..............C=1.2, l1_ratio=0.9;, score=0.800 total time=   4.6s




[CV 7/10] END ..............C=1.2, l1_ratio=0.9;, score=0.792 total time=   4.7s




[CV 8/10] END ..............C=1.2, l1_ratio=0.9;, score=0.794 total time=   4.9s




[CV 9/10] END ..............C=1.2, l1_ratio=0.9;, score=0.812 total time=   4.4s




[CV 10/10] END .............C=1.2, l1_ratio=0.9;, score=0.774 total time=   4.5s




[CV 1/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.782 total time=   4.5s




[CV 2/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.767 total time=   4.8s




[CV 3/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.782 total time=   4.9s




[CV 4/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.795 total time=   4.5s




[CV 5/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.800 total time=   4.8s




[CV 6/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.800 total time=   4.5s




[CV 7/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.792 total time=   4.4s




[CV 8/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.794 total time=   4.7s




[CV 9/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.812 total time=   4.5s




[CV 10/10] END C=1.2, l1_ratio=0.9400000000000001;, score=0.774 total time=   4.3s




[CV 1/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.782 total time=   4.4s




[CV 2/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.767 total time=   4.8s




[CV 3/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.782 total time=   4.3s




[CV 4/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.795 total time=   4.4s




[CV 5/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.800 total time=   4.6s




[CV 6/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.800 total time=   4.4s




[CV 7/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.792 total time=   4.7s




[CV 8/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.794 total time=   4.4s




[CV 9/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.812 total time=   4.8s




[CV 10/10] END C=1.2, l1_ratio=0.9800000000000001;, score=0.774 total time=   4.5s




[CV 1/10] END ..............C=1.0, l1_ratio=0.9;, score=0.782 total time=   4.5s
[CV 2/10] END ..............C=1.0, l1_ratio=0.9;, score=0.766 total time=   4.8s




[CV 3/10] END ..............C=1.0, l1_ratio=0.9;, score=0.782 total time=   4.3s




[CV 4/10] END ..............C=1.0, l1_ratio=0.9;, score=0.795 total time=   4.5s




[CV 5/10] END ..............C=1.0, l1_ratio=0.9;, score=0.800 total time=   4.6s




[CV 6/10] END ..............C=1.0, l1_ratio=0.9;, score=0.800 total time=   4.7s




[CV 7/10] END ..............C=1.0, l1_ratio=0.9;, score=0.792 total time=   4.6s




[CV 8/10] END ..............C=1.0, l1_ratio=0.9;, score=0.794 total time=   4.3s




[CV 9/10] END ..............C=1.0, l1_ratio=0.9;, score=0.812 total time=   5.0s




[CV 10/10] END .............C=1.0, l1_ratio=0.9;, score=0.774 total time=   4.4s




[CV 1/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.782 total time=   4.9s
[CV 2/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.766 total time=   5.0s




[CV 3/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.782 total time=   4.6s




[CV 4/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.795 total time=   4.5s




[CV 5/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.800 total time=   4.6s




[CV 6/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.800 total time=   4.8s




[CV 7/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.792 total time=   4.4s




[CV 8/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.794 total time=   4.6s




[CV 9/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.812 total time=   5.1s




[CV 10/10] END C=1.0, l1_ratio=0.9400000000000001;, score=0.774 total time=   4.6s




[CV 1/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.783 total time=   4.5s




[CV 2/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.766 total time=   5.0s




[CV 3/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.782 total time=   4.7s




[CV 4/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.795 total time=   4.5s




[CV 5/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.800 total time=   5.3s




[CV 6/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.801 total time=   4.7s




[CV 7/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.792 total time=   4.5s




[CV 8/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.794 total time=   4.3s




[CV 9/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.812 total time=   5.0s




[CV 10/10] END C=1.0, l1_ratio=0.9800000000000001;, score=0.774 total time=   4.4s




[CV 1/10] END ..............C=0.8, l1_ratio=0.9;, score=0.779 total time=   4.6s
[CV 2/10] END ..............C=0.8, l1_ratio=0.9;, score=0.766 total time=   4.3s




[CV 3/10] END ..............C=0.8, l1_ratio=0.9;, score=0.782 total time=   4.7s




[CV 4/10] END ..............C=0.8, l1_ratio=0.9;, score=0.796 total time=   4.4s




[CV 5/10] END ..............C=0.8, l1_ratio=0.9;, score=0.800 total time=   4.7s




[CV 6/10] END ..............C=0.8, l1_ratio=0.9;, score=0.801 total time=   4.9s




[CV 7/10] END ..............C=0.8, l1_ratio=0.9;, score=0.792 total time=   4.5s




[CV 8/10] END ..............C=0.8, l1_ratio=0.9;, score=0.794 total time=   4.5s




[CV 9/10] END ..............C=0.8, l1_ratio=0.9;, score=0.812 total time=   5.4s




[CV 10/10] END .............C=0.8, l1_ratio=0.9;, score=0.774 total time=   4.6s




[CV 1/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.780 total time=   4.7s
[CV 2/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.766 total time=   4.4s




[CV 3/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.782 total time=   4.6s




[CV 4/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.796 total time=   4.7s




[CV 5/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.800 total time=   4.4s




[CV 6/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.801 total time=   5.0s




[CV 7/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.792 total time=   4.4s




[CV 8/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.794 total time=   4.5s




[CV 9/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.812 total time=   4.6s




[CV 10/10] END C=0.8, l1_ratio=0.9400000000000001;, score=0.774 total time=   4.6s




[CV 1/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.780 total time=   4.5s
[CV 2/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.766 total time=   4.5s




[CV 3/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.782 total time=   5.1s




[CV 4/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.796 total time=   4.5s




[CV 5/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.799 total time=   4.6s




[CV 6/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.801 total time=   4.9s




[CV 7/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.792 total time=   4.5s




[CV 8/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.794 total time=   4.2s




[CV 9/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.812 total time=   4.8s




[CV 10/10] END C=0.8, l1_ratio=0.9800000000000001;, score=0.774 total time=   4.5s




In [40]:
grid_2.best_score_, grid_2.best_params_

(0.7898354562649631, {'C': 1.0, 'l1_ratio': 0.9800000000000001})

In [41]:
grid_search.best_score_, grid_search.best_params_

(0.7899503987936987, {'C': 1.0, 'l1_ratio': 1.0})

In [47]:
best_model = grid_2.best_estimator_

In [48]:
y_pred = best_model.predict(X_test)
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)

In [49]:
print(accuracy)

0.45990180032733224


In [52]:
submission = pd.concat([X_test_2['PassengerId'],pd.DataFrame(y_pred,columns = ['Transported'])], axis = 1)

In [54]:
submission = submission.set_index('PassengerId')

In [56]:
submission.to_csv("submission_reset.csv")