In [None]:
# Cell 1
# Importing required libraries

import numpy as np
import pandas as pd


In [None]:
# Cell 2
# Importing visualization libraries

import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Cell 3
# Checking library versions

print(np.__version__)
print(pd.__version__)


In [None]:
# Cell 4
# Loading Titanic dataset

df = pd.read_csv("titanic.csv")


In [None]:
# Cell 5
# Display first 5 rows

df.head()


In [None]:
# Cell 6
# Display last 5 rows

df.tail()


In [None]:
# Cell 7
# Dataset shape

df.shape


In [None]:
# Cell 8
# Dataset information

df.info()


In [None]:
# Cell 9
# Statistical summary

df.describe()


In [None]:
# Cell 10
# Checking column names

df.columns


In [None]:
# Cell 11
# Checking missing values

df.isnull().sum()


In [None]:
# Cell 12
# Handling missing Age values

df["Age"].fillna(df["Age"].mean(), inplace=True)


In [None]:
# Cell 13
# Handling missing Embarked values

df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)


In [None]:
# Cell 14
# Drop Cabin column

df.drop("Cabin", axis=1, inplace=True)


In [None]:
# Cell 15
# Check missing values again

df.isnull().sum()


In [None]:
# Cell 16
# Survival count

df["Survived"].value_counts()


In [None]:
# Cell 17
# Gender count

df["Sex"].value_counts()


In [None]:
# Cell 18
# Survival by gender

df.groupby("Sex")["Survived"].mean()


In [None]:
# Cell 19
# Survival by passenger class

df.groupby("Pclass")["Survived"].mean()


In [None]:
# Cell 20
# Average age of passengers

df["Age"].mean()


In [None]:
# Cell 21
# Youngest and oldest passenger

df["Age"].min(), df["Age"].max()


In [None]:
# Cell 22
# Creating age groups

df["AgeGroup"] = pd.cut(df["Age"], bins=[0,12,20,40,60,80])
df["AgeGroup"].value_counts()


In [None]:
# Cell 23
# Survival by age group

df.groupby("AgeGroup")["Survived"].mean()


In [None]:
# Cell 24
# Fare statistics

df["Fare"].describe()


In [None]:
# Cell 25
# Correlation matrix

df.corr()


In [None]:
# Cell 26
# Heatmap visualization

plt.figure()
sns.heatmap(df.corr(), annot=True)
plt.show()


In [None]:
# Cell 27
# Survival count plot

sns.countplot(x="Survived", data=df)
plt.show()


In [None]:
# Cell 28
# Survival by gender plot

sns.countplot(x="Survived", hue="Sex", data=df)
plt.show()


In [None]:
# Cell 29
# Survival by class plot

sns.countplot(x="Survived", hue="Pclass", data=df)
plt.show()


In [None]:
# Cell 30
# Age distribution

sns.histplot(df["Age"], bins=30)
plt.show()


In [None]:
# Cell 31
# Fare distribution

sns.histplot(df["Fare"], bins=30)
plt.show()


In [None]:
# Cell 32
# Boxplot of Age

sns.boxplot(x=df["Age"])
plt.show()


In [None]:
# Cell 33
# Boxplot of Fare

sns.boxplot(x=df["Fare"])
plt.show()


In [None]:
# Cell 34
# Encoding gender column

df["Sex"] = df["Sex"].map({"male":0, "female":1})


In [None]:
# Cell 35
# Encoding Embarked column

df = pd.get_dummies(df, columns=["Embarked"], drop_first=True)


In [None]:
# Cell 36
# Final dataset preview

df.head()


In [None]:
# Cell 37
# Feature selection

X = df.drop("Survived", axis=1)
y = df["Survived"]


In [None]:
# Cell 38
# Train test split

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# Cell 39
# Import Logistic Regression

from sklearn.linear_model import LogisticRegression


In [None]:
# Cell 40
# Model training

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [None]:
# Cell 41
# Predictions

y_pred = model.predict(X_test)


In [None]:
# Cell 42
# Model accuracy

from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)


In [None]:
# Cell 43
# Confusion matrix

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)


In [None]:
# Cell 44
# Classification report

from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))


In [None]:
# Cell 45
# Model coefficients

model.coef_


In [None]:
# Cell 46
# Model intercept

model.intercept_


In [None]:
# Cell 47
# Final conclusion cell

print("Titanic data analysis completed successfully")


In [None]:
# Cell 48
# End of assignment

print("Assignment submitted by FarahNoor")
