### **Deployed Model Link** : https://task-3.streamlit.app/

### **Dependencies** 
- pandas 
- numpy 
- Normalization 
- Models 
- pickle 
- warnings ignorance

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle 
import warnings 
warnings.filterwarnings('ignore')

### **Preprocessings** 
- Constructor 
- Load Data 
- Preprocess Data 

In [8]:
# Class for loading and preprocessing the data
class TitanicData:
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = None
        self.X = None
        self.y = None

    def load_data(self):
        self.data = pd.read_csv(self.file_path)
        return self.data

    def preprocess_data(self):
        # Dropping unnecessary columns and handling missing data
        self.data['Age'].fillna(self.data['Age'].mean(), inplace=True)
        self.data['Embarked'].fillna(self.data['Embarked'].mode()[0], inplace=True)
        self.data['Fare'].fillna(self.data['Fare'].mean(), inplace=True)

        self.data.drop(columns=['Cabin', 'Ticket', 'Name', 'PassengerId'], inplace=True)

        # Encoding categorical variables
        self.data = pd.get_dummies(self.data, columns=['Sex', 'Embarked'], drop_first=True)

        # Defining features (X) and target (y)
        self.X = self.data.drop('Survived', axis=1)
        self.y = self.data['Survived']
        
        return self.X, self.y

### **Model Training** 
- Constructor 
- Spliting X and Y 
- train the model 
- Evaluate the model

In [9]:
# Class for model training and evaluation
class TitanicModel:
    def __init__(self):
        self.model = None
        self.scaler = StandardScaler()

    def split_data(self, X, y, test_size=0.2, random_state=42):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
        return X_train, X_test, y_train, y_test

    def train_model(self, X_train, y_train):
        # Scaling the features
        X_train_scaled = self.scaler.fit_transform(X_train)
        self.model = RandomForestClassifier(random_state=42)
        self.model.fit(X_train_scaled, y_train)

    def evaluate_model(self, X_test, y_test):
        X_test_scaled = self.scaler.transform(X_test)
        y_pred = self.model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)
        return accuracy


### **Model Saving** 
- Combine constructor 
- Model saving function() 


In [10]:
# Class for saving the model using pickle
class ModelSaver:
    def __init__(self, model, scaler, feature_names):
        self.model = model
        self.scaler = scaler
        self.feature_names = feature_names

    def save_model(self, model_path, scaler_path, feature_names_path):
        with open(model_path, 'wb') as model_file:
            pickle.dump(self.model, model_file)
        with open(scaler_path, 'wb') as scaler_file:
            pickle.dump(self.scaler, scaler_file)
        with open(feature_names_path, 'wb') as feature_file:
            pickle.dump(self.feature_names, feature_file)


### **Main Parallel Working** 
- Load Dependencies 
- Train and evaluate the model 
- Save the model 
- Accuracy : 80.45% (Increaseable by Hyperparameter tuning)

In [11]:
# Main function to execute the workflow
if __name__ == "__main__":
    # Step 1: Load and preprocess data
    titanic_data = TitanicData('titanic.csv')
    titanic_data.load_data()
    X, y = titanic_data.preprocess_data()

    # Step 2: Train and evaluate the model
    titanic_model = TitanicModel()
    X_train, X_test, y_train, y_test = titanic_model.split_data(X, y)
    titanic_model.train_model(X_train, y_train)
    accuracy = titanic_model.evaluate_model(X_test, y_test)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")

    # Step 3: Save the model, scaler, and feature names
    model_saver = ModelSaver(titanic_model.model, titanic_model.scaler, X.columns.tolist())
    model_saver.save_model('titanic_model.pkl', 'scaler.pkl', 'feature_names.pkl')


Model Accuracy: 80.45%
