In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [55]:
#Load the dataset
data=pd.read_csv("titanic.csv")
data.head() #Data inspection

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [56]:
data=data.drop(['PassengerId','Name','Ticket','Fare','SibSp','Parch','Cabin','Embarked'], axis='columns')
data.head() # Drop columns which are not useful.

Unnamed: 0,Survived,Pclass,Sex,Age
0,0,3,male,22.0
1,1,1,female,38.0
2,1,3,female,26.0
3,1,1,female,35.0
4,0,3,male,35.0


In [57]:
df=pd.get_dummies(data, columns=['Sex','Pclass']) #Dummy columns (binary) are created for columns containing non-numeric data
df.dropna(inplace=True) #Replaces df with df and columns with binary values
df.head()

Unnamed: 0,Survived,Age,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3
0,0,22.0,False,True,False,False,True
1,1,38.0,True,False,True,False,False
2,1,26.0,True,False,False,False,True
3,1,35.0,True,False,True,False,False
4,0,35.0,False,True,False,False,True


In [58]:
x=df.drop(columns='Survived') # All features except target column
y=df['Survived'] #Target column
x=(x-x.mean())/(x.std()) #Feature scaling
x=np.array(x)
y=np.array(y).reshape(-1,1)


In [60]:
#Adding a bias term
m,n=x.shape
 #Adding a column of ones as bias term
x = np.hstack((np.ones((m, 1)), x))



In [67]:
#Logistic Regression

class LogisticRegression:
    def __init__(self,learning_rate=0.01,epochs=1000):
        self.learning_rate=learning_rate
        self.epochs=epochs
        self.costs=[]
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    
    def compute_cost(self,y,y_hat): #Mean squred error cost function
        m=y.shape[0]
        cost=(1/m)*np.sum((y-y_hat)**2)
        return cost
    
    def fit(self,x,y):
        self.theta=np.zeros((x.shape[1],1)) #Initializing weights
        
        for i in range(self.epochs):
            # Forward propagation
            z = np.dot(x, self.theta)
            y_hat = self.sigmoid(z)
            
            # Cost computation
            cost = self.compute_cost(y, y_hat)
            self.costs.append(cost)
            
            # Gradient descent update
            gradient = (2/x.shape[0]) * np.dot(x.T, (y_hat - y) * y_hat * (1 - y_hat))
            self.theta -= self.learning_rate * gradient
            
    def predict(self,x):
        z=np.dot(x,self.theta)
        y_hat=self.sigmoid(z)
        return (y_hat>=0.5).astype(int)
                
    
        
                              
    
    
        

In [68]:
model=LogisticRegression(learning_rate=0.01,epochs=1000)
model.fit(x,y)