In [80]:
import numpy as np

# sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# cost function
def cost(theta, X, y):
    h = sigmoid(X @ theta)
    return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

# gradient function
def gradient(theta, X, y):
    h = sigmoid(X @ theta)
    return (X.T @ (h - y)) / y.size

# update theta
def update_theta(theta, X, y, alpha):
    return theta - alpha * gradient(theta, X, y)

# logistic regression function
def logistic_regression(X, y, alpha, num_iters):
    # add bias term
    X = np.c_[np.ones(X.shape[0]), X]
    
    # initialize theta with 0s
    theta = np.zeros(X.shape[1])
    
    # perform gradient descent
    for i in range(num_iters):
        theta = update_theta(theta, X, y, alpha)
        print(i, "Cost", cost(theta, X, y))
    return (theta)

# read data from CSV file
data = np.loadtxt("train.csv", delimiter=",", dtype=str)

# remove the 3rd column and first row from data 
data = np.delete(data, 2, 1)
data = data[1:,:]

# replace 'male' with 1 and 'female' with 0 in new column 3
data[:,2] = np.where(data[:,2] == 'male', 1, np.where(data[:,2] == 'female', 0, data[:,2]))

# convert data to numpy array
data = data.astype(float)

# split data into features and output
X = data[:, 1:-1]
y = data[:, 0]

alpha = 0.009
num_iters = 5000

theta = logistic_regression(X, y, alpha, num_iters)

test_data = np.loadtxt("test.csv", delimiter=",", dtype=str)
# remove the 3rd column and first row from data 
test_data = np.delete(test_data, 2, 1)
test_data = test_data[1:,:]

# replace 'male' with 1 and 'female' with 0 in new column 3
test_data[:,2] = np.where(test_data[:,2] == 'male', 1, np.where(test_data[:,2] == 'female', 0, test_data[:,2]))

# convert data to numpy array
test_data = test_data.astype(float)

# split data into features and output
X_test = test_data[:, 1:-1]
X_test = np.c_[np.ones(X_test.shape[0]), X_test]
y_test = test_data[:, 0]

# Predict the response for test dataset
y_pred = sigmoid(np.dot(X_test, theta))

#Check the accuracy
y_pred = [1 if i >= 0.5 else 0 for i in y_pred]

score=0
for i in range(len(y_test)):
    if y_pred[i]==y_test[i]:
        score+=1
print("Accuracy:", score/len(y_test)*100, "%")

0 Cost 0.7058911020434595
1 Cost 0.704068110814906
2 Cost 0.7227273576544461
3 Cost 0.7136805727179959
4 Cost 0.7369474630305397
5 Cost 0.7195970211841851
6 Cost 0.7454978867813201
7 Cost 0.7222528206998761
8 Cost 0.7494115773956659
9 Cost 0.7231419009030967
10 Cost 0.7508966685523701
11 Cost 0.7232846311409461
12 Cost 0.7513679813457672
13 Cost 0.723146537420326
14 Cost 0.7514566169509109
15 Cost 0.7229085744219987
16 Cost 0.7514057542215666
17 Cost 0.722636618948297
18 Cost 0.7513036385711145
19 Cost 0.7223539515119292
20 Cost 0.7511814669662625
21 Cost 0.7220686558174745
22 Cost 0.7510501396353968
23 Cost 0.7217834933951177
24 Cost 0.7509134610263097
25 Cost 0.7214993841187991
26 Cost 0.7507727826686221
27 Cost 0.7212166167591046
28 Cost 0.7506286151530333
29 Cost 0.720935265798966
30 Cost 0.7504811822724484
31 Cost 0.7206553336982533
32 Cost 0.7503306098719041
33 Cost 0.720376799064402
34 Cost 0.7501769897081295
35 Cost 0.7200996328482768
36 Cost 0.7500204009036796
37 Cost 0.719823

1383 Cost 0.5874062071338905
1384 Cost 0.5956829405912141
1385 Cost 0.5872534736367472
1386 Cost 0.5955006680867386
1387 Cost 0.5871008430622695
1388 Cost 0.5953185416961825
1389 Cost 0.5869483153044227
1390 Cost 0.5951365613389756
1391 Cost 0.5867958902574746
1392 Cost 0.5949547269348882
1393 Cost 0.5866435678159944
1394 Cost 0.5947730384040312
1395 Cost 0.5864913478748557
1396 Cost 0.5945914956668598
1397 Cost 0.5863392303292362
1398 Cost 0.5944100986441732
1399 Cost 0.5861872150746189
1400 Cost 0.5942288472571181
1401 Cost 0.5860353020067938
1402 Cost 0.5940477414271892
1403 Cost 0.5858834910218579
1404 Cost 0.5938667810762315
1405 Cost 0.5857317820162176
1406 Cost 0.5936859661264428
1407 Cost 0.5855801748865875
1408 Cost 0.5935052965003736
1409 Cost 0.5854286695299935
1410 Cost 0.5933247721209308
1411 Cost 0.5852772658437732
1412 Cost 0.5931443929113791
1413 Cost 0.5851259637255766
1414 Cost 0.5929641587953421
1415 Cost 0.5849747630733674
1416 Cost 0.592784069696805
1417 Cost 0.584

2910 Cost 0.5175820953024601
2911 Cost 0.5175695004626504
2912 Cost 0.5175569119194241
2913 Cost 0.5175443296678405
2914 Cost 0.5175317537029636
2915 Cost 0.5175191840198617
2916 Cost 0.5175066206136081
2917 Cost 0.5174940634792803
2918 Cost 0.5174815126119602
2919 Cost 0.517468968006735
2920 Cost 0.5174564296586955
2921 Cost 0.5174438975629378
2922 Cost 0.5174313717145621
2923 Cost 0.5174188521086734
2924 Cost 0.5174063387403811
2925 Cost 0.5173938316047991
2926 Cost 0.5173813306970457
2927 Cost 0.5173688360122439
2928 Cost 0.5173563475455215
2929 Cost 0.51734386529201
2930 Cost 0.5173313892468463
2931 Cost 0.5173189194051709
2932 Cost 0.5173064557621297
2933 Cost 0.5172939983128725
2934 Cost 0.5172815470525536
2935 Cost 0.5172691019763319
2936 Cost 0.5172566630793709
2937 Cost 0.5172442303568384
2938 Cost 0.5172318038039068
2939 Cost 0.5172193834157526
2940 Cost 0.5172069691875572
2941 Cost 0.517194561114506
2942 Cost 0.5171821591917894
2943 Cost 0.517169763414602
2944 Cost 0.5171573

4311 Cost 0.5044160743188485
4312 Cost 0.5044089754850156
4313 Cost 0.5044018789807326
4314 Cost 0.5043947848045239
4315 Cost 0.5043876929549146
4316 Cost 0.5043806034304317
4317 Cost 0.5043735162296021
4318 Cost 0.5043664313509555
4319 Cost 0.5043593487930218
4320 Cost 0.504352268554332
4321 Cost 0.5043451906334191
4322 Cost 0.5043381150288163
4323 Cost 0.5043310417390588
4324 Cost 0.5043239707626826
4325 Cost 0.5043169020982248
4326 Cost 0.5043098357442239
4327 Cost 0.5043027716992194
4328 Cost 0.5042957099617524
4329 Cost 0.5042886505303644
4330 Cost 0.5042815934035988
4331 Cost 0.50427453858
4332 Cost 0.5042674860581134
4333 Cost 0.5042604358364855
4334 Cost 0.5042533879136645
4335 Cost 0.5042463422881991
4336 Cost 0.5042392989586396
4337 Cost 0.5042322579235373
4338 Cost 0.5042252191814449
4339 Cost 0.5042181827309159
4340 Cost 0.5042111485705054
4341 Cost 0.5042041166987692
4342 Cost 0.5041970871142645
4343 Cost 0.5041900598155499
4344 Cost 0.5041830348011848
4345 Cost 0.50417601

In [61]:
print(X)
print(y)

[[ 1.  3.  1. 22.  1.  0.]
 [ 1.  1.  0. 38.  1.  0.]
 [ 1.  3.  0. 26.  0.  0.]
 ...
 [ 1.  1.  0. 39.  1.  0.]
 [ 1.  3.  0. 18.  1.  0.]
 [ 1.  3.  1. 32.  0.  0.]]
[0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 1. 1.
 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0.
 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0.
 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0.
 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0.
 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1.
 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1.
 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.
 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0.
 0. 1. 1. 1. 0. 1. 1. 0. 1. 1

In [47]:
print(data)

[[ 0.      3.      1.     ...  1.      0.      7.25  ]
 [ 1.      1.      0.     ...  1.      0.     71.2833]
 [ 1.      3.      0.     ...  0.      0.      7.925 ]
 ...
 [ 1.      1.      0.     ...  1.      0.     55.9   ]
 [ 0.      3.      0.     ...  1.      0.     14.4583]
 [ 1.      3.      1.     ...  0.      0.      7.925 ]]


In [49]:
print(data[0])

[ 0.    3.    1.   22.    1.    0.    7.25]
