In [1]:
# Import Packages

import plotly
import plotly.plotly as py
import plotly.graph_objs as go


import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('fivethirtyeight')

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [2]:
# 1. Get Data

train = pd.read_csv('./train.csv')

In [3]:
# 3. Explore Data

print(f" Shape of Data Set: {train.shape}")
print(train.info())
print(train.describe())
print(f" Total Null values: {train.isnull().sum().sum()}")
null_values = train.isnull().sum()
columns_with_null_values = null_values[null_values > 0]
print(f" Columns with Null Values: {columns_with_null_values}")

 Shape of Data Set: (891, 12)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
None
       PassengerId    Survived      Pclass         Age       SibSp  \
count   891.000000  891.000000  891.000000  714.000000  891.000000   
mean    446.000000    0.383838    2.308642   29.699118    0.523008   
std     257.353842    0.486592    0.836071   14.526497    1.102743   
min       1.000000    0.000000    1.000000    0.420000    0.000000   
25%     223.500000    0.000000

In [4]:
train.groupby(['Cabin']).sum().head()

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
Cabin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A10,584,0,1,36.0,0,0,40.125
A14,476,0,1,0.0,0,0,52.0
A16,557,1,1,48.0,1,0,39.6
A19,285,0,1,0.0,0,0,26.0
A20,600,1,1,49.0,1,0,56.9292


In [5]:
train.groupby(['Embarked']).sum()

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C,74820,93,317,4005.92,65,61,10072.2962
Q,32178,30,224,786.5,33,13,1022.2543
S,289496,217,1514,16312.75,368,266,17439.3988


In [6]:
# 4. Clean Data

train_obj = train.select_dtypes(exclude=[np.number])
train_num = train.select_dtypes(include=[np.number])

# Drop the 2 rows were values are missing from Embarked

train.dropna(axis=0, subset=['Embarked'], inplace=True)

# Fill NaN Ages with the mean

train['Age'] = train['Age'].fillna(train.Age.mean())

# Fill NaN Cabin with 'Unidentified'

train.Cabin.fillna('Unidentified', inplace=True)

In [7]:
# 5. Feature Engineering

# Slicing on First Letter of Cabin, then dropping categorical column

train['Cabin_category'] = train['Cabin'].map(lambda x: x[:1])

train = train.drop('Cabin', axis=1)

# Creating dummy columns for the different cabins, then dropping categorical column

train = pd.concat([train, pd.get_dummies(train['Cabin_category'])], axis = 1)

train.drop(['Cabin_category'], axis=1, inplace=True)

# Creating dummy column for Reverends on board

train['IsReverend'] = train['Name'].map(lambda x: 1 if 'Rev' in x else 0)

# Creating dummy column for Young Males on board

train['IsMaster'] = train['Name'].map(lambda x: 1 if 'Master' in x else 0)

# Dropping Name and Ticket columns

train.drop(['Name', 'Ticket'], axis=1, inplace=True)

# Creating Male/Female dummy columns, then dropping categorical column

train = pd.concat([train, pd.get_dummies(train['Sex'])], axis = 1)

train.drop(['Sex'], axis=1, inplace=True)

# Creating dummy columns for Passenger Classes, then dropping original column

train = pd.concat([train, pd.get_dummies(train['Pclass'])], axis = 1)

train.drop(['Pclass'], axis=1, inplace=True)

# Creating Embarked dummy columns, then dropping categorical column

train = pd.concat([train, pd.get_dummies(train['Embarked'])], axis = 1)

train.drop(['Embarked'], axis=1, inplace=True)

# Creating column that sums up Siblings and Parents

train['FamilyCount'] = train['SibSp'] + train['Parch']

# Creating column that counts whether passenger had family on board

train['HasFamily'] = train['FamilyCount'].map(lambda x: 1 if x > 0 else 0)

# Creating dummy column for Employees on board

train['IsEmployee'] = train['Fare'].map(lambda x: 1 if x==0 else 0)

In [8]:
train.columns

Index(['PassengerId',    'Survived',         'Age',       'SibSp',
             'Parch',        'Fare',           'A',           'B',
                 'C',           'D',           'E',           'F',
                 'G',           'T',           'U',  'IsReverend',
          'IsMaster',      'female',        'male',             1,
                   2,             3,           'C',           'Q',
                 'S', 'FamilyCount',   'HasFamily',  'IsEmployee'],
      dtype='object')

In [9]:
train.isnull().sum()

PassengerId    0
Survived       0
Age            0
SibSp          0
Parch          0
Fare           0
A              0
B              0
C              0
D              0
E              0
F              0
G              0
T              0
U              0
IsReverend     0
IsMaster       0
female         0
male           0
1              0
2              0
3              0
C              0
Q              0
S              0
FamilyCount    0
HasFamily      0
IsEmployee     0
dtype: int64

In [10]:
train.columns.unique()

Index(['PassengerId',    'Survived',         'Age',       'SibSp',
             'Parch',        'Fare',           'A',           'B',
                 'C',           'D',           'E',           'F',
                 'G',           'T',           'U',  'IsReverend',
          'IsMaster',      'female',        'male',             1,
                   2,             3,           'Q',           'S',
       'FamilyCount',   'HasFamily',  'IsEmployee'],
      dtype='object')

In [11]:
# Train-Train-Split on Data Set

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

target = ['Survived']
not_target = ['Age','SibSp','Parch', 'Fare', 'A', 'B', 'C', 'D', 'E',
              'F', 'G', 'T', 'U', 'IsReverend', 'IsMaster', 'female',
              'male', 1, 2, 3, 'Q', 'S', 'FamilyCount', 'HasFamily', 'IsEmployee']

X = train[not_target]
Y = train[target]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=42)



In [13]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 889 entries, 0 to 890
Data columns (total 26 columns):
Age            889 non-null float64
SibSp          889 non-null int64
Parch          889 non-null int64
Fare           889 non-null float64
A              889 non-null uint8
B              889 non-null uint8
C              889 non-null uint8
C              889 non-null uint8
D              889 non-null uint8
E              889 non-null uint8
F              889 non-null uint8
G              889 non-null uint8
T              889 non-null uint8
U              889 non-null uint8
IsReverend     889 non-null int64
IsMaster       889 non-null int64
female         889 non-null uint8
male           889 non-null uint8
1              889 non-null uint8
2              889 non-null uint8
3              889 non-null uint8
Q              889 non-null uint8
S              889 non-null uint8
FamilyCount    889 non-null int64
HasFamily      889 non-null int64
IsEmployee     889 non-null int64
dtypes:

In [14]:
train['Survived'].isnull().sum()

0

In [15]:
# Feature Selection

from sklearn.feature_selection import  SelectKBest, f_regression, f_classif
selector = SelectKBest(score_func=f_classif)

target = ['Survived']
not_target = ['Age','SibSp','Parch', 'Fare', 'A', 'B', 'C', 'D', 'E',
              'F', 'G', 'T', 'U', 'IsReverend', 'IsMaster', 'female',
              'male', 1, 2, 3, 'Q', 'S', 'FamilyCount', 'HasFamily', 'IsEmployee']

predictors = not_target

selector.fit(train[predictors], train[target].values.ravel())

best_features = selector.get_support(indices=True)

features = list(train[predictors].columns[selector.get_support(indices = True)])
features

['Fare', 'B', 'C', 'U', 'female', 'male', 1, 3, 'S', 'HasFamily']

In [16]:
# Standard Scaler

ss = StandardScaler()
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

In [17]:
# GridSearchCV across LogisticRegression

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

gs_params = {
    'penalty':['l1','l2'],
    'solver':['liblinear'],
    'C':np.logspace(-5,0,100)
}

logreg_gridsearch = GridSearchCV(LogisticRegression(), gs_params, cv=5, verbose=1)

logreg_gridsearch = logreg_gridsearch.fit(X_train_scaled, Y_train)

best_logreg_gridsearch = logreg_gridsearch.best_estimator_

logreg_gridsearch_train = best_logreg_gridsearch.score(X_train_scaled, Y_train)
logreg_gridsearch_test = best_logreg_gridsearch.score(X_test_scaled, Y_test)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec

In [18]:
# GridSearchCV across SGDClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import SGDClassifier

gs_params = {
    'loss':['hinge', 'log', 'modified_huber',
    'squared_hinge', 'perceptron'],
    'penalty':['none', 'l2', 'l1', 'elasticnet'],
    'learning_rate': ['constant', 'optimal', 'invscaling'],
    'eta0' : [1]
}

SGD_gridsearch = GridSearchCV(SGDClassifier(), gs_params, cv=5, verbose=1)

SGD_gridsearch = SGD_gridsearch.fit(X_train_scaled, Y_train)

best_SGD_gridsearch = SGD_gridsearch.best_estimator_

SGD_gridsearch_train = best_SGD_gridsearch.score(X_train_scaled, Y_train)
SGD_gridsearch_test = best_SGD_gridsearch.score(X_test_scaled, Y_test)

Fitting 5 folds for each of 60 candidates, totalling 300 fits



max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_sam


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_sam


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


max_iter and tol parameters have been added in <class 'sklearn.linear_model.stochastic_gradient.S

In [19]:
# GridSearchCV across kNN Classification

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

knn_params = {
    'n_neighbors':[1,3,5,9,15,21],
    'weights':['uniform','distance'],
    'metric':['euclidean','manhattan']
}

knn_gridsearch = GridSearchCV(KNeighborsClassifier(), knn_params, cv=5, verbose=1, n_jobs=2)

knn_gridsearch = knn_gridsearch.fit(X_train_scaled, Y_train)

best_knn_gridsearch = knn_gridsearch.best_estimator_

knn_gridsearch_train = best_knn_gridsearch.score(X_train_scaled, Y_train)
knn_gridsearch_test = best_knn_gridsearch.score(X_test_scaled, Y_test)

Fitting 5 folds for each of 24 candidates, totalling 120 fits



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec

In [20]:
# RandomizedSearchCV across kNN Classification

from sklearn.model_selection import RandomizedSearchCV

knn_params = {
    'n_neighbors':[1,3,5,9,15,21],
    'weights':['uniform','distance'],
    'metric':['euclidean','manhattan']
}

knn_randomsearch = RandomizedSearchCV(KNeighborsClassifier(), knn_params, cv=5, 
                                      n_iter=10, verbose=1, n_jobs=2, random_state=42)

knn_randomsearch = knn_randomsearch.fit(X_train_scaled, Y_train)

best_knn_randomsearch = knn_randomsearch.best_estimator_

knn_randomsearch_train = best_knn_randomsearch.score(X_train_scaled, Y_train)
knn_randomsearch_test = best_knn_randomsearch.score(X_test_scaled, Y_test)

Fitting 5 folds for each of 10 candidates, totalling 50 fits



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec

In [21]:
# GridSearchCV across BernoulliNB

from sklearn.naive_bayes import BernoulliNB

gs_params = {
    'alpha': np.logspace(-5,0,100)
}

bernoulliNB_gridsearch = GridSearchCV(BernoulliNB(), gs_params, cv=5, verbose=1)

bernoulliNB_gridsearch = bernoulliNB_gridsearch.fit(X_train_scaled, Y_train)

best_bernoulliNB_gridsearch = bernoulliNB_gridsearch.best_estimator_

bernoulliNB_gridsearch_train = best_bernoulliNB_gridsearch.score(X_train_scaled, Y_train)
bernoulliNB_gridsearch_test = best_bernoulliNB_gridsearch.score(X_test_scaled, Y_test)



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



Fitting 5 folds for each of 100 candidates, totalling 500 fits



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


A column-vector y was passed when a 1d array was expec

In [22]:
# Model Selection Executive Summary

print('GridSearchCV across LogisticRegression:')
print(f"Best Parameters = {logreg_gridsearch.best_params_}")
print(f"Best CV Score = {logreg_gridsearch.best_score_}")
print(f"Train Score = {logreg_gridsearch_train}")
print(f"Test Score = {logreg_gridsearch_test}")
print()

print('GridSearchCV across SGDClassifier:')
print(f"Best Parameters = {SGD_gridsearch.best_params_}")
print(f"Best CV Score = {SGD_gridsearch.best_score_}")
print(f"Train Score = {SGD_gridsearch_train}")
print(f"Test Score = {SGD_gridsearch_test}")
print()

print('GridSearchCV across kNN Classification:')
print(f"Best Parameters = {knn_gridsearch.best_params_}")
print(f"Best CV Score = {knn_gridsearch.best_score_}")
print(f"Train Score = {knn_gridsearch_train}")
print(f"Test Score = {knn_gridsearch_test}")
print()

print('RandomizedSearchCV across kNN Classification:')
print(f"Best Parameters = {knn_randomsearch.best_params_}")
print(f"Best CV Score = {knn_randomsearch.best_score_}")
print(f"Train Score = {knn_randomsearch_train}")
print(f"Test Score = {knn_randomsearch_test}")
print()

print('GridSearchCV across BernoulliNB:')
print(f"Best Parameters = {bernoulliNB_gridsearch.best_params_}")
print(f"Best CV Score = {bernoulliNB_gridsearch.best_score_}")
print(f"Train Score = {bernoulliNB_gridsearch_train}")
print(f"Test Score = {bernoulliNB_gridsearch_test}")
print()

GridSearchCV across LogisticRegression:
Best Parameters = {'C': 0.0774263682681127, 'penalty': 'l2', 'solver': 'liblinear'}
Best CV Score = 0.8333333333333334
Train Score = 0.8453453453453453
Test Score = 0.8116591928251121

GridSearchCV across SGDClassifier:
Best Parameters = {'eta0': 1, 'learning_rate': 'invscaling', 'loss': 'log', 'penalty': 'l2'}
Best CV Score = 0.8318318318318318
Train Score = 0.8363363363363363
Test Score = 0.8161434977578476

GridSearchCV across kNN Classification:
Best Parameters = {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'uniform'}
Best CV Score = 0.8258258258258259
Train Score = 0.8558558558558559
Test Score = 0.7668161434977578

RandomizedSearchCV across kNN Classification:
Best Parameters = {'weights': 'uniform', 'n_neighbors': 9, 'metric': 'manhattan'}
Best CV Score = 0.8258258258258259
Train Score = 0.8588588588588588
Test Score = 0.7847533632286996

GridSearchCV across BernoulliNB:
Best Parameters = {'alpha': 1e-05}
Best CV Score = 0.75675675

In [23]:
test = pd.read_csv('./test.csv')

In [24]:
# 4. Clean Test Data

test_obj = test.select_dtypes(exclude=[np.number])
test_num = test.select_dtypes(include=[np.number])

# Drop the 2 rows were values are missing from Embarked

test.dropna(axis=0, subset=['Embarked'], inplace=True)

# Fill NaN Ages with the mean

test['Age'] = test['Age'].fillna(test.Age.mean())

# Fill NaN Cabin with 'Unidentified'

test.Cabin.fillna('Unidentified', inplace=True)

# Fill NaN Fare with the mean

test['Fare'] = test['Fare'].fillna(test.Fare.mean())

In [25]:
# 5. Feature Engineering

# Slicing on First Letter of Cabin, then dropping categorical column

test['Cabin_category'] = test['Cabin'].map(lambda x: x[:1])

test = test.drop('Cabin', axis=1)

# Creating dummy column for Reverends on board

test['IsReverend'] = test['Name'].map(lambda x: 1 if 'Rev' in x else 0)

# Creating dummy column for Young Males on board

test['IsMaster'] = test['Name'].map(lambda x: 1 if 'Master' in x else 0)

# Dropping Name and Ticket columns

test.drop(['Name', 'Ticket'], axis=1, inplace=True)

# Creating Male/Female dummy columns, then dropping categorical column

test = pd.concat([test, pd.get_dummies(test['Sex'])], axis = 1)

test.drop(['Sex'], axis=1, inplace=True)

# Creating dummy columns for Passenger Classes, then dropping original column

test = pd.concat([test, pd.get_dummies(test['Pclass'])], axis = 1)

test.drop(['Pclass'], axis=1, inplace=True)

# Creating Embarked dummy columns, then dropping categorical column

test = pd.concat([test, pd.get_dummies(test['Embarked'])], axis = 1)

test.drop(['Embarked'], axis=1, inplace=True)

# Creating column that sums up Siblings and Parents

test['FamilyCount'] = test['SibSp'] + test['Parch']

# Creating column that counts whether passenger had family on board

test['HasFamily'] = test['FamilyCount'].map(lambda x: 1 if x > 0 else 0)

# Creating dummy column for Employees on board

test['IsEmployee'] = test['Fare'].map(lambda x: 1 if x==0 else 0)

test["T"] = 0

# Creating dummy columns for the different cabins, then dropping categorical column

test = pd.concat([test, pd.get_dummies(test['Cabin_category'])], axis = 1)

test.drop(['Cabin_category'], axis=1, inplace=True)

In [26]:
test.columns

Index(['PassengerId',         'Age',       'SibSp',       'Parch',
              'Fare',  'IsReverend',    'IsMaster',      'female',
              'male',             1,             2,             3,
                 'C',           'Q',           'S', 'FamilyCount',
         'HasFamily',  'IsEmployee',           'T',           'A',
                 'B',           'C',           'D',           'E',
                 'F',           'G',           'U'],
      dtype='object')

In [37]:
# Standard Scaler

test_data = ['Age','SibSp','Parch', 'Fare', 'A', 'B', 'C', 'D', 'E',
              'F', 'G', 'T', 'U', 'IsReverend', 'IsMaster', 'female',
              'male', 1, 2, 3, 'Q', 'S', 'FamilyCount', 'HasFamily', 'IsEmployee']

X = test[test_data]

ss = StandardScaler()
X_test_scaled = ss.fit_transform(X)

In [43]:
# Use Chosen Model to Make Predictions

y_preds = best_SGD_gridsearch.predict(X_test_scaled)


In [44]:
# Submission

submission_best_SGD_gridsearch = pd.DataFrame(data = y_preds, columns = ['Survived'], index=test['PassengerId'])
submission_best_SGD_gridsearch.to_csv('./submission_best_SGD_gridsearch.csv')

In [41]:
# from sklearn.metrics import confusion_matrix

# confusion_matrix(Y_test,best_logreg_gridsearch.predict(X_test_scaled))

In [None]:
# from sklearn.metrics import classification_report

# print(classification_report(Y_test,logreg.predict(X_test_scaled)))