In [1]:
import numpy as np

In [2]:
import tflearn

In [3]:
import pandas as pd

In [4]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [5]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


First, we will discard the fields that are not likely to help in our analysis. For example, we make the assumption that 'name' field will not be very useful in our task, because we estimate that a passenger name and his chance of surviving are not correlated. With such thinking, we discard 'name' and 'ticket' fields.

Then, we need to convert all our data to numerical values, because a neural network model can only perform operations over numbers. However, our dataset contains some non numerical values, such as 'name' or 'sex'. Because 'name' is discarded, we just need to handle 'sex' field. In this simple case, we will just assign '0' to males and '1' to females.

In [6]:
train.drop(['Name','Ticket'],inplace=True,axis=1)
test.drop(['Name','Ticket'],inplace=True,axis=1)

In [7]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,1,0,3,male,22.0,1,0,7.25,,S
1,2,1,1,female,38.0,1,0,71.2833,C85,C
2,3,1,3,female,26.0,0,0,7.925,,S
3,4,1,1,female,35.0,1,0,53.1,C123,S
4,5,0,3,male,35.0,0,0,8.05,,S


In [8]:
train['Sex']=pd.get_dummies(train['Sex'])['male']
test['Sex']=pd.get_dummies(test['Sex'])['male']

In [9]:
train['Age']=train['Age'].fillna(0)
test['Age']=test['Age'].fillna(0)

In [10]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,1,0,3,1,22.0,1,0,7.25,,S
1,2,1,1,0,38.0,1,0,71.2833,C85,C
2,3,1,3,0,26.0,0,0,7.925,,S
3,4,1,1,0,35.0,1,0,53.1,C123,S
4,5,0,3,1,35.0,0,0,8.05,,S


In [11]:
embark_binary = pd.get_dummies(train['Embarked'],drop_first=True)
embark_binary_test = pd.get_dummies(test['Embarked'],drop_first=True)

In [12]:
train = pd.merge(train,embark_binary,left_index=True,right_index=True)
test = pd.merge(test,embark_binary_test,left_index=True,right_index=True)

In [13]:
train = train.drop(['Cabin','Embarked'],axis=1)
test = test.drop(['Cabin','Embarked'],axis=1)

In [14]:
test.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Q,S
0,892,3,1,34.5,0,0,7.8292,1,0
1,893,3,0,47.0,1,0,7.0,0,1
2,894,2,1,62.0,0,0,9.6875,1,0
3,895,3,1,27.0,0,0,8.6625,0,1
4,896,3,0,22.0,1,1,12.2875,0,1


In [15]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Q,S
0,1,0,3,1,22.0,1,0,7.25,0,1
1,2,1,1,0,38.0,1,0,71.2833,0,0
2,3,1,3,0,26.0,0,0,7.925,0,1
3,4,1,1,0,35.0,1,0,53.1,0,1
4,5,0,3,1,35.0,0,0,8.05,0,1


In [16]:
net = tflearn.input_data(shape=[None,8])
net = tflearn.fully_connected(net,32)
net = tflearn.fully_connected(net,32)
net = tflearn.fully_connected(net,2,activation='softmax')
net = tflearn.regression(net)

In [17]:
model = tflearn.DNN(net)

In [18]:
train = train.dropna()

In [19]:
data = train.drop(['PassengerId','Survived'],axis=1)
labels = train[['Survived','Sex']]

In [20]:
test_data = test.drop(['PassengerId'],axis=1)

In [21]:
labels.head()

Unnamed: 0,Survived,Sex
0,0,1
1,1,0
2,1,0
3,1,0
4,0,1


In [22]:
labels['alt']=(labels['Survived']+1)%2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
labels.drop('Sex',axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [24]:
data = data.as_matrix()

In [25]:
labels = labels.as_matrix()

In [26]:
labels = labels.reshape(-1,2)

In [27]:
labels.shape

(891, 2)

In [28]:
data.shape

(891, 8)

In [41]:
model.fit(data,labels, n_epoch=10,batch_size=64,show_metric=True)

Training Step: 979  | total loss: [1m[32m0.47828[0m[0m | time: 0.031s
| Adam | epoch: 030 | loss: 0.47828 - acc: 0.7963 -- iter: 832/891
Training Step: 980  | total loss: [1m[32m0.47514[0m[0m | time: 0.034s
| Adam | epoch: 030 | loss: 0.47514 - acc: 0.8010 -- iter: 891/891
--


In [42]:
test_data.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Q,S
0,3,1,34.5,0,0,7.8292,1,0
1,3,0,47.0,1,0,7.0,0,1
2,2,1,62.0,0,0,9.6875,1,0
3,3,1,27.0,0,0,8.6625,0,1
4,3,0,22.0,1,1,12.2875,0,1


In [43]:
pred = model.predict(test_data)

In [44]:
pred[0][0]

0.16257119

In [45]:
survived = np.apply_along_axis(lambda passenger: 1 if passenger[0]>passenger[1] else 0,axis=1,arr=pred)

In [46]:
test.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Q,S
0,892,3,1,34.5,0,0,7.8292,1,0
1,893,3,0,47.0,1,0,7.0,0,1
2,894,2,1,62.0,0,0,9.6875,1,0
3,895,3,1,27.0,0,0,8.6625,0,1
4,896,3,0,22.0,1,1,12.2875,0,1


In [47]:
pred[:5]

array([[ 0.16257119,  0.83742881],
       [ 0.58763212,  0.41236791],
       [ 0.20738475,  0.79261518],
       [ 0.15610866,  0.84389132],
       [ 0.59168994,  0.40831003]], dtype=float32)

In [48]:
survived

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0,

In [49]:
sub = pd.DataFrame({'PassengerId':test['PassengerId'],'Survived':survived})

In [50]:
sub.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1


In [51]:
sub.to_csv('DNN Attempt 3.csv',index=False)