In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Load data
df = pd.read_csv('Titanic-Dataset.csv')

# Handle missing values
df['Age'].fillna(df['Age'].mean(), inplace=True)
df.drop('Cabin', axis=1, inplace=True)
df.dropna(inplace=True)

# Prepare data for training
test = df.drop(['PassengerId', 'Name', 'Ticket'], axis=1)
print("Dataset before Label encoding\n")
print(test)

# Label encode categorical features
label_encoder = LabelEncoder()
test['Sex'] = label_encoder.fit_transform(test['Sex'])
test['Embarked'] = label_encoder.fit_transform(test['Embarked'])
print("\nDataset after Label encoding\n")
print(test)

# Split data into features and target
x = test.drop('Survived', axis=1)
y = test['Survived']
X_train, x_test, Y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=11)

# Initialize and train Random Forest Classifier
model_random = RandomForestClassifier()
model_random.fit(X_train, Y_train)

# Print training and test accuracy
print(f"Training Accuracy : {model_random.score(X_train, Y_train)}")
print(f"Test Accuracy : {model_random.score(x_test, y_test)}")


Dataset before Label encoding

     Pclass     Sex        Age  SibSp  Parch     Fare Embarked  Survived
0         3    male  22.000000      1      0   7.2500        S         0
1         1  female  38.000000      1      0  71.2833        C         1
2         3  female  26.000000      0      0   7.9250        S         1
3         1  female  35.000000      1      0  53.1000        S         1
4         3    male  35.000000      0      0   8.0500        S         0
..      ...     ...        ...    ...    ...      ...      ...       ...
886       2    male  27.000000      0      0  13.0000        S         0
887       1  female  19.000000      0      0  30.0000        S         1
888       3  female  29.699118      1      2  23.4500        S         0
889       1    male  26.000000      0      0  30.0000        C         1
890       3    male  32.000000      0      0   7.7500        Q         0

[889 rows x 8 columns]

Dataset after Label encoding

     Pclass  Sex        Age  SibSp  Pa

In [4]:
def predict_survival(row):
    """
    Predict the survival of a passenger given their details.

    Parameters:
    row (pd.Series): A row of the DataFrame containing the details of the passenger.

    Returns:
    int: Predicted survival (1 for survived, 0 for did not survive).
    """
    
    # Convert to DataFrame
    data_df = pd.DataFrame([row])
    
    # Perform the same preprocessing steps
    data_df['Age'].fillna(df['Age'].mean(), inplace=True)
    data_df['Sex'] = label_encoder.fit_transform(data_df['Sex'])
    data_df['Embarked'] = label_encoder.fit_transform(data_df['Embarked'])
    
    # Predict
    prediction = model_random.predict(data_df)
    return prediction[0]

# Input from user 
input_str = input('Enter the new data for prediction (tab-separated values): ')
input_values = input_str.split('\t')

# Assuming the input is ordered as follows: PassengerId, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked
if len(input_values) != 11:
    print(len(input_values))
    print(input_values)
    print("Error: Expected 11 values, but got a different number of values.")
else:
    # Extract the relevant columns
    input_dict = {
        'Pclass': input_values[1],
        'Sex': input_values[3],
        'Age': input_values[4],
        'SibSp': input_values[5],
        'Parch': input_values[6],
        'Fare': input_values[8],
        'Embarked': input_values[10]
    }
    
    # Convert to Series
    input_series = pd.Series(input_dict)
    
    # Predict the survival
    prediction = predict_survival(input_series)
    
    if prediction == 1:
        print()
        print(f"Predicted Class : {prediction}")
        print(f'The passenger survived')
    else:
        print()
        print(f"Predicted Class : {prediction}")
        print(f'The passenger did not survive')


Enter the new data for prediction (tab-separated values):  2	1	Cumings, Mrs. John Bradley (Florence Briggs Thayer)	female	38	1	0	PC 17599	71.2833	C85	C



Predicted Class : 1
The passenger survived
