<a href="https://colab.research.google.com/github/ittee/Titanic-Classification-/blob/main/Task_1_Titanic_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Titanic dataset load from Seaborn
data = sns.load_dataset('titanic')

# Check the first few rows of the dataset
print(data.head())

# Handle missing values
data['age'].fillna(data['age'].mean(), inplace=True)  # Fill missing age values with mean
data['embarked'].fillna(data['embarked'].mode()[0], inplace=True)  # Fill missing embarked values with mode
data.dropna(subset=['sex'], inplace=True)  # Drop rows where 'sex' column has missing values

# Encode categorical features: 'sex', 'embarked'
label_encoder = LabelEncoder()
data['sex'] = label_encoder.fit_transform(data['sex'])  # male = 0, female = 1
data['embarked'] = data['embarked'].map({'C': 0, 'Q': 1, 'S': 2})  # C = 0, Q = 1, S = 2

# Feature selection: Select relevant features for the model
X = data[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']]  # Independent variables
y = data['survived']  # Dependent variable

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training using Logistic Regression
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Model evaluation: Predicting survival for the test set
y_pred = model.predict(X_test)

# Accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix to show the performance in detail
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Display the first 5 predictions
for i in range(5):
    print(f"Passenger {i+1}: {'Survived' if y_pred[i] == 1 else 'Did not survive'}")

   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  
Accuracy: 81.01%
Confusion Matrix:
[[90 15]
 [19 55]]
Passenger 1: Did not survive
Passenger 2: Did not survive
Passenger 3: Did not survive
Passenger 4: Survived
Passenger 5: Survived


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['age'].fillna(data['age'].mean(), inplace=True)  # Fill missing age values with mean
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['embarked'].fillna(data['embarked'].mode()[0], inplace=True)  # Fill missing embarked values with mode
