In [None]:
import pandas as pd

df = pd.read_csv("C:\\Users\\Hardik\\Desktop\\titanic-survivor-prediction\\train.csv")

# Drop columns that don't help
df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

# Fill missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# Encode categorical variables
df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)


In [None]:
from sklearn.model_selection import train_test_split

X = df.drop("Survived", axis=1)
y = df["Survived"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

lr = LogisticRegression(max_iter=1000)
dt = DecisionTreeClassifier()
rf = RandomForestClassifier()

lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(x='Survived', data=pd.read_csv("train.csv"))
plt.title("Survival Count")
plt.savefig("survival_count.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
sns.histplot(df['Age'], kde=True)
plt.title("Age Distribution")
plt.savefig("age_distribution.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, RocCurveDisplay

# Confusion matrix for Random Forest
ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)
plt.title("Random Forest - Confusion Matrix")
plt.savefig("rf_confusion_matrix.png", dpi=300, bbox_inches='tight')
plt.show()

# ROC Curve for Random Forest
RocCurveDisplay.from_estimator(rf, X_test, y_test)
plt.title("Random Forest - ROC Curve")
plt.savefig("rf_roc_curve.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

models = {"Logistic Regression": lr, "Decision Tree": dt, "Random Forest": rf}
results = []

for name, model in models.items():
    y_pred = model.predict(X_test)
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1 Score": f1_score(y_test, y_pred)
    })

pd.DataFrame(results)


In [None]:
import joblib

# Save the model
joblib.dump(rf, "random_forest_model.pkl")
