In [7]:
import numpy as np
import pandas as pd

# Load Titanic dataset
titanic_data = pd.read_csv("titanic.csv")

# Data preprocessing
# Drop irrelevant columns
titanic_data = titanic_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)
# Convert 'Sex' column to numeric
titanic_data['Sex'] = titanic_data['Sex'].map({'male': 0, 'female': 1})
# Fill missing values in 'Age' column with median age
titanic_data['Age'] = titanic_data['Age'].fillna(titanic_data['Age'].median())
# Fill missing values in 'Fare' column with median fare
titanic_data['Fare'] = titanic_data['Fare'].fillna(titanic_data['Fare'].median())
# Convert 'Survived' column to numeric
titanic_data['Survived'] = titanic_data['Survived'].astype(int)

# Feature scaling (optional for logistic regression, but recommended)
def feature_scaling(data):
    return (data - data.mean()) / data.std()

titanic_data[['Age', 'Fare']] = feature_scaling(titanic_data[['Age', 'Fare']])

# Logistic Regression model
class LogisticRegression:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
        self.verbose = verbose
    
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
        self.theta = np.zeros(X.shape[1])
        
        for i in range(self.num_iter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            self.theta -= self.lr * gradient
            
            if(self.verbose == True and i % 10000 == 0):
                z = np.dot(X, self.theta)
                h = self.__sigmoid(z)
                print(f'Loss: {self.__loss(h, y)} \t')
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)
    
        return self.__sigmoid(np.dot(X, self.theta))
    
    def predict(self, X, threshold=0.5):
        return self.predict_prob(X) >= threshold

# Splitting data into features and target variable
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']

# Training the logistic regression model
model = LogisticRegression()
model.fit(X, y)

# Predicting survival on random data points
# Generating random data points
np.random.seed(0)
random_data_points = pd.DataFrame({
    'Pclass': np.random.randint(1, 4, 5),
    'Sex': np.random.randint(0, 2, 5),
    'Age': np.random.uniform(0, 80, 5),
    'SibSp': np.random.randint(0, 5, 5),
    'Parch': np.random.randint(0, 5, 5),
    'Fare': np.random.uniform(0, 300, 5)
})

# Scaling features
random_data_points[['Age', 'Fare']] = feature_scaling(random_data_points[['Age', 'Fare']])

# Predicting survival on random data points
predictions = model.predict(random_data_points)
probabilities = model.predict_prob(random_data_points)

# Printing predictions and probabilities
print("Random Data Points:")
print(random_data_points)
print("\nPredictions (Survived=1, Not Survived=0):")
print(predictions.astype(int))
print("\nProbabilities of Survival:")
print(probabilities)


FileNotFoundError: [Errno 2] No such file or directory: 'titanic.csv'

In [1]:
gl
n


NameError: name 'gl' is not defined