In [6]:
import pandas as pd
import numpy as  np
import seaborn as sns
import matplotlib.pyplot as plt
% matplotlib inline

titanic_train = pd.read_csv('titanic_train.csv')
titanic_test = pd.read_csv('titanic_test.csv')

In [7]:
# データーの前処理

# label incodingでmaleを1にfemaleを0に取り換えて新しい項目Sex_encodedを作成する
from sklearn import preprocessing
input_labels = ['male', 'female']
encoder = preprocessing.LabelEncoder()
encoder.fit(input_labels)
encoded_values = encoder.transform(titanic_train['Sex'])
titanic_train['Sex_encoded'] = encoded_values

#EmbarkedのC, Q, Sをそれぞれ1,2,3にマッピングする
Embarked_mapping = {'C':1, 'Q':2, 'S':3}
titanic_train['Embarked'] = titanic_train['Embarked'].map(Embarked_mapping)

#Embarkedで欠損値を持っているのは２つのみなので、その２つを削除する。
titanic_train = titanic_train.dropna(subset=['Embarked'])

#Ageの欠損値を処理,回帰補完を行う
titanic_train = titanic_train.interpolate(method='linear')

#前処理したデータの内、データーが足りないCabinと乗客の生死を相関関係がほぼないName
#とTicketを除きデーターセットを準備する
X = titanic_train[['PassengerId','Pclass','Sex_encoded','Age', 
                   'SibSp','Parch','Fare','Embarked']]
y = titanic_train[['Survived']]

In [8]:
#Elastic Netモデルを使う
from sklearn.linear_model import ElasticNet
model_en= ElasticNet(alpha=0.001, l1_ratio=0.9) 
model_en.fit(X, y)

ElasticNet(alpha=0.001, copy_X=True, fit_intercept=True, l1_ratio=0.9,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [9]:
# データーの前処理

# label incodingでmaleを1にfemaleを0に取り換えて新しい項目Sex_encodedを作成する
from sklearn import preprocessing
input_labels = ['male', 'female']
encoder = preprocessing.LabelEncoder()
encoder.fit(input_labels)
encoded_values = encoder.transform(titanic_test['Sex'])
titanic_test['Sex_encoded'] = encoded_values

#EmbarkedのC, Q, Sをそれぞれ1,2,3にマッピングする
Embarked_mapping = {'C':1, 'Q':2, 'S':3}
titanic_test['Embarked'] = titanic_test['Embarked'].map(Embarked_mapping)

#Embarkedで欠損値を持っているのは２つのみなので、その２つを削除する。
titanic_test = titanic_test.dropna(subset=['Embarked'])

#Ageの欠損値を処理,回帰補完を行う
titanic_test = titanic_test.interpolate(method='linear')

#前処理したデータの内、データーが足りないCabinと乗客の生死を相関関係がほぼないName
#とTicketを除きデーターセットを準備する
X_2 = titanic_test[['PassengerId','Pclass','Sex_encoded','Age', 
                   'SibSp','Parch','Fare','Embarked']]

In [17]:
n_pred = model_en.predict(X_2).reshape(418,1)

from sklearn import preprocessing
data_binarized = preprocessing.Binarizer(threshold=0.5).transform(n_pred)

print(data_binarized)

[[ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]

In [18]:
A = np.array(data_binarized,dtype='int64').T
for i in np.arange(418):
    print(A[0][i])

0
0
0
0
1
0
1
0
1
0
0
0
1
0
1
1
0
0
1
1
0
0
1
1
1
0
1
0
0
0
0
0
1
1
0
0
1
1
0
0
0
0
0
1
1
0
0
0
1
1
0
0
1
1
0
0
0
0
0
1
0
0
0
1
1
1
1
0
0
1
1
0
1
1
1
1
0
1
0
1
0
0
0
0
0
0
1
1
1
0
1
0
1
0
1
0
1
0
1
0
1
0
0
0
1
0
0
0
0
0
0
1
1
1
1
0
0
1
0
1
1
0
1
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
0
0
0
0
0
1
0
0
1
0
0
1
1
0
1
1
0
1
0
0
1
0
0
1
1
0
0
0
0
0
1
1
0
1
1
0
0
1
0
1
0
1
0
0
0
0
0
0
0
0
0
1
1
0
1
1
0
0
1
0
0
1
0
1
0
0
0
0
1
0
0
1
0
1
0
1
0
1
0
1
1
0
1
0
0
0
1
0
0
0
0
0
0
1
1
1
1
0
0
0
0
1
0
1
1
1
0
1
0
0
0
0
0
1
0
0
0
1
1
0
0
0
0
1
0
0
0
1
1
0
1
0
0
0
0
1
0
1
1
1
0
0
0
0
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
1
1
1
0
0
0
0
0
0
0
0
1
0
1
0
0
0
1
0
0
1
0
0
0
0
0
0
0
0
0
1
0
1
0
1
0
1
1
0
0
0
1
0
1
0
0
1
0
1
1
0
1
0
0
1
1
0
0
1
0
0
1
1
1
0
0
0
0
0
1
1
0
1
0
0
0
0
0
1
0
0
0
1
0
1
0
0
1
0
1
1
0
0
0
0
1
1
1
1
1
0
1
0
0
0


In [None]:
#このモデルで得られたデーターをkaggleで評価してもらった結果,scoreは0.75598だった