# Titanic Survival Prediction
This notebook performs data preprocessing, model training, and evaluation on the Titanic dataset.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib


In [None]:
train_df = pd.read_csv('C:\\Users\\Hardik\\Desktop\\titanic-survivor-prediction\\train.csv')
test_df = pd.read_csv('C:\\Users\\Hardik\\Desktop\\titanic-survivor-prediction\\test.csv')


In [None]:
full_df = pd.concat([train_df, test_df], sort=False)
full_df['Age'].fillna(full_df['Age'].median(), inplace=True)
full_df['Fare'].fillna(full_df['Fare'].median(), inplace=True)
full_df['Embarked'].fillna(full_df['Embarked'].mode()[0], inplace=True)
full_df.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

le = LabelEncoder()
full_df['Sex'] = le.fit_transform(full_df['Sex'])
full_df['Embarked'] = le.fit_transform(full_df['Embarked'])

train_clean = full_df[:len(train_df)]
test_clean = full_df[len(train_df):]


In [None]:
X = train_clean.drop(['Survived', 'PassengerId'], axis=1)
y = train_clean['Survived']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))


In [None]:
joblib.dump(model, 'model.pkl')
print("Model saved as model.pkl")
