In [None]:
from google.colab import files
uploads=files.upload()

In [102]:
import pandas as pd
import numpy as np
df=pd.read_csv('Titanic-Dataset.csv')

# Titanic Survival Prediction - Machine Learning Model
Building a classification model to predict passenger survival using Logistic Regression

## Data Preparation

In [None]:
df=df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
df['Age']=df['Age'].fillna(df['Age'].median())
df['Fare']=df['Fare'].fillna(df['Fare'].median())
df['Sex']=df['Sex'].map({'male':1,'female':0})
print(df.isna().sum())
df.head()

In [None]:
from sklearn.model_selection import train_test_split
X=df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
y=df['Survived']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
print(f"Training data: {X_train.shape[0]} rows")
print(f"Testing data: {X_test.shape[0]} rows")
print(f"\nFeatures shape{X_train.shape}")
print(f"Target shape {y_train.shape}")

## Train/Test Split
Splitting data: 80% training, 20% testing

In [None]:
from sklearn.linear_model import LogisticRegression
#create the model
model=LogisticRegression(random_state=42,max_iter=500)
#train the model
model.fit(X_train,y_train)
print("Model trained!")

## Model Training
Training Logistic Regression model on training data

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# make predictions on test data
y_pred=model.predict(X_test)
# how accurate is the model
accuracy=accuracy_score(y_test,y_pred)
print(f"model accuracy: {accuracy}")
print(f"That means {accuracy*100:.1f}% of predictions were correct!\n")
# more detailed results
print("Classification report")
print(classification_report(y_test,y_pred,target_names=['Did not survive','Survived']))
# confusion matrix(shows where model gets confused)
print("\nConfusion Matrix")
cm=confusion_matrix(y_test,y_pred)
print(cm)
print("\nInterpretation:")
print(f"True negatives(correctly predicted died): {cm[0,0]}")
print(f"True positives(correctly predicted survived): {cm[1,1]}")
print(f"False negatives(predicted died,actually survived): {cm[1,0]}")
print(f"False positives(predicted survived,actually died): {cm[0,1]}")

## Model Evaluation
Testing model accuracy on unseen data

In [None]:
feature_names=['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
coefficients=model.coef_[0]
importance_df=pd.DataFrame({
    'Feature':feature_names,
    'Coefficient':coefficients
}).sort_values('Coefficient',ascending=False)
print("Feature Importance For Survival Prediction:")
print(importance_df)
print("\nInterpretation:")
for idx, row in importance_df.iterrows():
    if row['Coefficient'] > 0:
        print(f"{row['Feature']}: INCREASES survival chance")
    else:
        print(f"{row['Feature']}: DECREASES survival chance")

## Feature Importance
Understanding which features drive survival predictions
- Fare: Increases survival (richer = better cabins)
- Age: Younger passengers more likely to survive
- Sex: Females much more likely to survive (-2 coefficient)
- Pclass: Lower class number (1st class) more likely to survive

## Making Predictions
Using the trained model to predict survival for new passengers

In [None]:
# predict for a new passenger
# example 1: young female ,first class ,high fare
passenger1=[[1,0,25,0,0,500]]
prediction1=model.predict(passenger1)[0]
probability1=model.predict_proba(passenger1)[0]
print("passenger 1: young female, first class, expensive ticket")
print(f"prediction: {'survived' if prediction1==1 else 'died'}")
print(f"probability: {probability1[1]:.1%} chances of survival")
# example 2: old male, 3rd class, cheap fare
passenger2=[[3,1,60,0,0,50]]
prediction2=model.predict(passenger2)[0]
probability2=model.predict_proba(passenger2)[0]
print("\nPassenger 2: old  male, 3rd class, cheap fare")
print(f"prediction: {'survived' if prediction2==1 else 'died'}")
print(f"probability2: {probability2[1]:.1%} chances of survival")

## Prediction Examples

### Example 1: Young Female, 1st Class
- Pclass: 1 (1st class)
- Sex: Female
- Age: 25
- Fare: $500 (expensive)
- **Model Prediction: 98.6% survival chance**

### Example 2: Old Male, 3rd Class  
- Pclass: 3 (3rd class)
- Sex: Male
- Age: 60
- Fare: $50 (cheap)
- **Model Prediction: 5.3% survival chance**

The model correctly identifies passenger characteristics that affect survival.