<a href="https://colab.research.google.com/github/alirezash97/Machine-Learning-Course/blob/main/Titanic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd 
df = pd.read_csv('/content/titanicdata.csv')
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age
0,0,3,2,22.0
1,1,1,1,38.0
2,1,3,1,26.0
3,1,1,1,35.0
4,0,3,2,35.0


In [2]:
X = df[["Pclass", "Sex", "Age"]].values
Y = df[["Survived"]]

In [6]:
import copy
import numpy as np



def normalize(X, column_numbers):

  normalized_X = copy.deepcopy(X)

  for column in column_numbers:

    selected_column = X[:, column]
    column_mean = np.mean(selected_column)
    column_std = np.std(selected_column)
    normalized_column = np.zeros(( selected_column.shape ))

    for  row_index, row in enumerate(selected_column):
      temp = ( row - column_mean) / column_std
      normalized_column[row_index] = temp


    normalized_column = normalized_column.reshape(normalized_column.shape[0])
    normalized_X[:, column] = normalized_column 

  
  return normalized_X



In [7]:

u = normalize(X, [0, 1, 2])
print(u[:5, :])

[[ 0.82737724  0.73769513 -0.5924806 ]
 [-1.56610693 -1.35557354  0.63878901]
 [ 0.82737724 -1.35557354 -0.2846632 ]
 [-1.56610693 -1.35557354  0.40792596]
 [ 0.82737724  0.73769513  0.40792596]]


In [8]:


def preprocess(X, column_numbers, Y):

  preprocessed_X = np.ones((X.shape[0], X.shape[1] + 1)) # + 1 for bias
  preprocessed_X[:, 1:] = normalize(X, column_numbers)

  preprocessed_Y = np.where(Y==0, -1, Y)

  
  return preprocessed_X, preprocessed_Y





In [9]:
X, Y = preprocess(X, [0, 1, 2], Y)
print(X[:5, :], X.shape)
print(Y[:5, :], Y.shape)

[[ 1.          0.82737724  0.73769513 -0.5924806 ]
 [ 1.         -1.56610693 -1.35557354  0.63878901]
 [ 1.          0.82737724 -1.35557354 -0.2846632 ]
 [ 1.         -1.56610693 -1.35557354  0.40792596]
 [ 1.          0.82737724  0.73769513  0.40792596]] (891, 4)
[[-1]
 [ 1]
 [ 1]
 [ 1]
 [-1]] (891, 1)


In [10]:
def sigmoid(s):
  
  return  1 / (1 + np.exp(-s)) 

In [11]:
def gradient_decent(X, Y, weights):
  
  # gradient_Ein = np.zeros((weight.shape))
  number_of_samples = X.shape[0]
  number_of_features = X.shape[1]
  full_shape_weight = np.zeros((X.shape))
  
  for i in range(number_of_samples):
    for j in range(number_of_features):
      full_shape_weight[i, j] = np.dot(Y[i], X[j, j]) / ( 1 + np.exp( np.dot( Y[i], np.matmul(X[i, :], weights.T) ) ) )

  gradient_Ein = ( (-1/number_of_samples) * np.sum(full_shape_weight, axis=0) ).reshape((1, number_of_features))


  return gradient_Ein

In [12]:
t = gradient_decent(X, Y, np.ones((1, 4)))
print(t.shape)
print(t)

(1, 4)
[[ 0.28376502 -0.44440636 -0.38466435  0.11575512]]


In [13]:
def evaluate(predicted_y, actual_y):
  
  
  number_of_samples = actual_y.shape[0]
  TP, TN, FP, FN = 0, 0, 0, 0
  for i in range(number_of_samples):
    
    if actual_y[i] == 1 and predicted_y[i] == 1:
      TP += 1
    elif actual_y[i] == 1 and predicted_y[i] == -1:
      FN += 1
    elif actual_y[i] == -1 and predicted_y[i] == 1:
      FP += 1
    elif actual_y[i] == -1 and predicted_y[i] == -1:
      TN += 1
    else:
      pass

  return ( (TP + TN) / (TP + TN + FP + FN) ) * 100





In [16]:


def LogisticRegression(X, Y, X_validation, Y_validation, Learning_Rate, epoch_num):


  initial_weights = np.ones((1, X.shape[1]))
  number_of_samples = X.shape[0]
  weights = copy.deepcopy(initial_weights)

  for i in range(epoch_num+1):
    
    # update
    weights -= (Learning_Rate * gradient_decent(X, Y, weights))
    
    # evaluate based on validation set
    temp = np.matmul(X, weights.T)
    # define vectorized sigmoid
    sigmoid_v = np.vectorize(sigmoid)
    predicted_probability = sigmoid_v(temp)
    
    prediction_temp = np.where(predicted_probability > 0.5, 1, predicted_probability)
    prediction = np.where(prediction_temp <= 0.5, -1, prediction_temp)

    

    accuracy = evaluate(prediction, Y)
    print(" Accuracy after ", i, " epochs : ", accuracy)

  return weights



In [17]:
p = LogisticRegression(X, Y, X, Y, 0.02, 1000)


 Accuracy after  0  epochs :  21.997755331088666
 Accuracy after  1  epochs :  21.997755331088666
 Accuracy after  2  epochs :  21.885521885521886
 Accuracy after  3  epochs :  21.548821548821547
 Accuracy after  4  epochs :  21.32435465768799
 Accuracy after  5  epochs :  21.32435465768799
 Accuracy after  6  epochs :  21.21212121212121
 Accuracy after  7  epochs :  20.875420875420875
 Accuracy after  8  epochs :  21.099887766554435
 Accuracy after  9  epochs :  21.099887766554435
 Accuracy after  10  epochs :  20.875420875420875
 Accuracy after  11  epochs :  20.763187429854096
 Accuracy after  12  epochs :  21.997755331088666
 Accuracy after  13  epochs :  21.997755331088666
 Accuracy after  14  epochs :  22.334455667789
 Accuracy after  15  epochs :  22.334455667789
 Accuracy after  16  epochs :  22.22222222222222
 Accuracy after  17  epochs :  22.334455667789
 Accuracy after  18  epochs :  22.22222222222222
 Accuracy after  19  epochs :  22.334455667789
 Accuracy after  20  epochs

In [None]:
print(p)

[[-0.55318403  3.43245226  3.10545517  0.36641592]]
