<a href="https://colab.research.google.com/github/abhijitguinkatwa/machine-learning-projects/blob/main/Titanic_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Step 1: Load and explore the dataset
# Assuming you have a CSV file with columns 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare'
titanic_data = pd.read_csv('titanic_data.csv')

# Display the first few rows of the dataset
print(titanic_data.head())

# Step 2: Data Preprocessing
# Handle missing values, convert categorical variables to numerical, and scale numerical features

# 2.1: Handle missing values in 'Age' and 'Fare' columns
imputer = SimpleImputer(strategy='mean')
titanic_data['Age'] = imputer.fit_transform(titanic_data[['Age']])
titanic_data['Fare'] = imputer.fit_transform(titanic_data[['Fare']])

# 2.2: Convert 'Sex' column to numerical values
titanic_data['Sex'] = titanic_data['Sex'].map({'male': 0, 'female': 1})

# 2.3: Scale numerical features using StandardScaler
scaler = StandardScaler()
numerical_features = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
titanic_data[numerical_features] = scaler.fit_transform(titanic_data[numerical_features])

# Step 3: Train-Test Split
# Split the dataset into training and testing sets
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build and Train the Classifier
# Use a Random Forest classifier for predicting survival
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Step 5: Make Predictions
# Predict labels for the test set
y_pred = classifier.predict(X_test)

# Step 6: Evaluate the Model
# Calculate accuracy and display classification report and confusion matrix
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
