In [1]:
#imports
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

In [2]:
#getting the dataset
data=pd.read_csv(r"C:\Users\ariki\Downloads\titanic_data.csv")

In [3]:
#showing the dataset
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
#droping not concerned columns
def drop_not_concerned(data, columns):
    return data.drop(columns, axis=1)

columns = ["PassengerId","Name", "Ticket", "Fare", "Cabin", "Embarked"]
data = drop_not_concerned(data, columns)

In [5]:
# checking the empty spots in test dataset
data.isna().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
dtype: int64

In [6]:
# ignoting the empty spots in test dataset
data.dropna(inplace=True)

In [7]:
#shape of the data 
data.shape

(714, 6)

In [8]:
#spliting the Pclass into 3 diffrents columns
def dummy_data(data, columns):
    for column in columns:
        data = pd.concat([data, pd.get_dummies(data[column], prefix=column)], axis=1)
        data = data.drop(column, axis=1)
    return data


columns = ["Pclass"]
data=dummy_data(data, columns)

In [9]:
#changing the 'sex' column into 0,1 (0=female, 1=male)
def sex_to_int(data):
    le = LabelEncoder()
    le.fit(["male","female"])
    data["Sex"]=le.transform(data["Sex"]) 
    return data

data = sex_to_int(data)

In [10]:
#normalizing the data
def normalize_age(data):
    scaler = MinMaxScaler()
    data["Age"] = scaler.fit_transform(data["Age"].values.reshape(-1,1))
    return data
data = normalize_age(data)

In [11]:
# showing the data after the cleaning
data.head()

Unnamed: 0,Survived,Sex,Age,SibSp,Parch,Pclass_1,Pclass_2,Pclass_3
0,0,1,0.271174,1,0,0,0,1
1,1,0,0.472229,1,0,1,0,0
2,1,0,0.321438,0,0,0,0,1
3,1,0,0.434531,1,0,1,0,0
4,0,1,0.434531,0,0,0,0,1


In [12]:
# setting up y label
y= data['Survived']
y.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

In [13]:
# setting up x label, aka the features
X = data.drop(['Survived'],axis=1)
X.head()

Unnamed: 0,Sex,Age,SibSp,Parch,Pclass_1,Pclass_2,Pclass_3
0,1,0.271174,1,0,0,0,1
1,0,0.472229,1,0,1,0,0
2,0,0.321438,0,0,0,0,1
3,0,0.434531,1,0,1,0,0
4,1,0.434531,0,0,0,0,1


In [14]:
# getting the x and y train and test
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X ,y, test_size=0.25 ,random_state=1)

In [15]:
x_train.shape

(535, 7)

In [16]:
x_test.shape

(179, 7)

In [17]:
y_train.shape

(535,)

In [18]:
y_test.shape

(179,)

In [19]:
y_train = y_train.values.reshape((535,1))
y_test = y_test.values.reshape((179,1))

In [20]:
y_train.shape

(535, 1)

In [21]:
y_test.shape

(179, 1)

In [22]:
tf.compat.v1.disable_eager_execution()

In [24]:
#simple logistic regression
features = 7
x = tf.placeholder(tf.float32, [None, features])
y_ = tf.placeholder(tf.float32, [None, 1])
W = tf.Variable(tf.zeros([features,1]))
b = tf.Variable(tf.zeros([1]))
y = tf.nn.sigmoid(tf.matmul(x,W) + b)
#loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.add(tf.matmul(x, W), b), labels=y_))
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=y_))

prediction = tf.round(y)
correct = tf.cast(tf.equal(prediction, y_), dtype=tf.float32)
accuracy = tf.reduce_mean(correct)

update = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(0,100001):
    sess.run(update, feed_dict = {x:x_train, y_:y_train})
    trainAcc = sess.run(accuracy, feed_dict={x: x_test, y_: y_test})
    if i % 10000 == 0 :
        print('Iteration:' , i , ' W:' , sess.run(W) , ' b:' , sess.run(b), ' loss:', loss.eval(session=sess, feed_dict = {x:x_train, y_:y_train}))
        print('accuracy',trainAcc)

Iteration: 0  W: [[-6.4730790e-05]
 [-1.9822814e-05]
 [-2.6903830e-05]
 [-1.3390415e-05]
 [ 2.6218561e-06]
 [-8.7790149e-06]
 [-4.5252062e-05]]  b: [-5.1409217e-05]  loss: 0.76565546
accuracy 0.6256983
Iteration: 10000  W: [[-0.51254654]
 [-0.14216796]
 [-0.16131343]
 [-0.06343859]
 [ 0.04563512]
 [-0.05884506]
 [-0.3463214 ]]  b: [-0.35953087]  loss: 0.7063425
accuracy 0.6256983
Iteration: 20000  W: [[-0.8167061 ]
 [-0.20072128]
 [-0.19781926]
 [-0.05141409]
 [ 0.11280239]
 [-0.07433719]
 [-0.5353327 ]]  b: [-0.4968671]  loss: 0.69022274
accuracy 0.6256983
Iteration: 30000  W: [[-1.0321102 ]
 [-0.22360137]
 [-0.19797736]
 [-0.01990984]
 [ 0.18816397]
 [-0.0694344 ]
 [-0.6610767 ]]  b: [-0.54234666]  loss: 0.68299127
accuracy 0.6256983
Iteration: 40000  W: [[-1.2057664 ]
 [-0.22929211]
 [-0.18379629]
 [ 0.01767219]
 [ 0.26777825]
 [-0.05345723]
 [-0.75789654]]  b: [-0.5435757]  loss: 0.6781922
accuracy 0.6256983
Iteration: 50000  W: [[-1.3567183 ]
 [-0.22559068]
 [-0.1632    ]
 [ 0.056

In [25]:
#adding hidden layer
features = 7
hidden_layer_nodes = 3
x = tf.placeholder(tf.float32, [None, features])
y_ = tf.placeholder(tf.float32, [None, 1])
W1 = tf.Variable(tf.truncated_normal([features,hidden_layer_nodes], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[hidden_layer_nodes]))
z1 = tf.add(tf.matmul(x,W1),b1)
a1 = tf.nn.relu(z1)
W2 = tf.Variable(tf.truncated_normal([hidden_layer_nodes,1], stddev=0.1))
b2 = tf.Variable(0.)
z2 = tf.matmul(a1,W2) + b2
y = 1 / (1.0 + tf.exp(-z2))
loss = tf.reduce_mean(-(y_ * tf.log(y) + (1 - y_) * tf.log( 1 - y)))

prediction3 = tf.round(y)
correct3 = tf.cast(tf.equal(prediction3, y_), dtype=tf.float32)
accuracy3 = tf.reduce_mean(correct3)


update = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(0,1000001):
    sess.run(update, feed_dict = {x:x_train, y_:y_train})
    trainAcc3 = sess.run(accuracy3, feed_dict={x: x_test, y_: y_test})
    if i % 100000 == 0 :
        print('Iteration:' , i , ' W:' , sess.run(W2) , ' b:' , sess.run(b2), ' loss:', loss.eval(session=sess, feed_dict = {x:x_train, y_:y_train}))
        print('accuracy',trainAcc3)


Iteration: 0  W: [[-0.04290079]
 [-0.02921832]
 [ 0.05355581]]  b: -7.9597936e-05  loss: 0.69217485
accuracy 0.6256983
Iteration: 100000  W: [[-0.46228033]
 [-1.4588023 ]
 [ 1.7524614 ]]  b: 0.6501099  loss: 0.43698046
accuracy 0.8044693
Iteration: 200000  W: [[-0.6562868]
 [-1.6409999]
 [ 2.0073483]]  b: 0.6592914  loss: 0.43009758
accuracy 0.8044693
Iteration: 300000  W: [[-0.86188376]
 [-1.7214214 ]
 [ 2.0763586 ]]  b: 0.6831091  loss: 0.42705578
accuracy 0.7932961
Iteration: 400000  W: [[-1.0039388]
 [-1.720195 ]
 [ 2.097874 ]]  b: 0.7248943  loss: 0.42613438
accuracy 0.79888266
Iteration: 500000  W: [[-1.0810578]
 [-1.7299536]
 [ 2.101669 ]]  b: 0.74159557  loss: 0.4257556
accuracy 0.79888266
Iteration: 600000  W: [[-1.1290525]
 [-1.7418746]
 [ 2.101669 ]]  b: 0.7645355  loss: 0.42557546
accuracy 0.79888266
Iteration: 700000  W: [[-1.1926229]
 [-1.7647964]
 [ 2.0999558]]  b: 0.7838194  loss: 0.42499492
accuracy 0.8044693
Iteration: 800000  W: [[-1.2442192]
 [-1.7923357]
 [ 2.09995