In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split

pd.set_option("display.max_rows",10)

In [None]:
df = pd.read_pickle("train_cleaned.pkl")

In [None]:
X = df.drop(['Survived','Name','Ticket'],axis=1).set_index('PassengerId')
y = df[['PassengerId','Survived']].set_index('PassengerId')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.4,
                                                    random_state=None)

### First Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(C=10000)
logreg.fit(X_train, y_train.iloc[:,0])

In [None]:
accuracy_train = logreg.score(X_train, y_train)
accuracy_test = logreg.score(X_test, y_test)
print("Train Score = %.4f\nTest Score = %.3f\n" % (accuracy_train, accuracy_test))

In [None]:
np.set_printoptions(precision=3)

print('n_inter:\t\t\t\t', logreg.n_iter_[0])
print('classes:\t\t\t\t', logreg.classes_[0])
print('intercepts:\t\t\t\t', logreg.intercept_[0])
featureList = list(X_train.columns)
coefList = logreg.coef_.tolist()
for i,feature in enumerate(featureList):
    print(feature,"coefficient:\t", coefList[0][i])

In [None]:
X.values

### Plot Decision Boundry for Age and Fare

In [None]:
from matplotlib import pylab as plt
%matplotlib inline

def plotDecisionBoundary(X, y, model, xlabel, ylabel):
    Q = X[[xlabel,ylabel]].values
    h = .02 # meshsize
    x_min, x_max = Q[:,0].min() - .5, Q[:,0].max() + .5
    y_min, y_max = Q[:,1].min() - .5, Q[:,1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    
    model.fit(X[[xlabel, ylabel]],y.iloc[:,0])
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure(1, figsize=(8,6))
    plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
    
    # Plot also the training points
    plt.scatter(Q[:,0], Q[:,1], c=y, edgecolors='k',cmap=plt.cm.Paired)
    plt.title('Decision Boundaries with '+xlabel+" and "+ylabel)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    
    plt.xlim(xx.min(),xx.max())
    plt.ylim(yy.min(),yy.max())
    plt.xticks(())
    plt.yticks(())
    
plotDecisionBoundary(X_train,y_train,LogisticRegression(),'Age','Fare')