CELL 1 â€“ Imports

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix


CELL 2-Load Data Set

In [None]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
)

df.head()


CELL 3-Data Cleansing

In [None]:

df.drop(columns=["Cabin", "Ticket", "Name"], inplace=True)

df["Age"].fillna(df["Age"].median(), inplace=True)
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)

df.isnull().sum()


CELL4-Encoding Categorical Data

In [None]:
le = LabelEncoder()

df["Sex"] = le.fit_transform(df["Sex"])
df["Embarked"] = le.fit_transform(df["Embarked"])


CELL5-Encoding & Scaling

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

le = LabelEncoder()

df["Sex"] = le.fit_transform(df["Sex"])
df["Embarked"] = le.fit_transform(df["Embarked"])

features = ["Age", "Sex", "Pclass", "SibSp", "Parch"]

X = df[features]

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

X_scaled = pd.DataFrame(X_scaled, columns=features)

X_scaled.head()


CELL 6-Feature Visualization

In [None]:
sns.histplot(df["Age"], kde=True)
plt.title("Age Distribution")
plt.show()

sns.countplot(x="Survived", data=df)
plt.title("Survival Count")
plt.show()


CELL 7-Prepare Data & Train-Test Split

X = df[["Age", "Sex", "Pclass", "SibSp", "Parch"]]
y = df["Fare"]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


CELL 8-Normalization

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


CELL 9-Linear Regression Model

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("R2 Score:", r2)


LOGISTIC REGRESSION

CELL 10-Prepare Data

In [None]:
X = df[["Age", "Sex", "Pclass", "SibSp", "Parch"]]
y = df["Survived"]


CELL 11-Split & Scale

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


CELL 12-Logistic Regression Model

In [None]:
log_model = LogisticRegression()
log_model.fit(X_train, y_train)

y_pred = log_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))


CELL 13-Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
