<a href="https://colab.research.google.com/github/agbaike/titanic-machine-learning-python-project/blob/main/Favour_Iruoghene_Agbaike_Titanic_Machine_learning_Survival_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from google.colab import files

# Load the provided Excel file
file_path = '/content/titanic31.xlsx'  # Replace with your file path in Colab
data = pd.read_excel(file_path)

# Handling missing values
data['age'].fillna(data['age'].median(), inplace=True)
data['fare'].fillna(data['fare'].median(), inplace=True)
data['embarked'].fillna(data['embarked'].mode()[0], inplace=True)

# Ensure 'age' is numeric
data['age'] = pd.to_numeric(data['age'], errors='coerce')

# Verify 'age' is now numeric
print(data['age'].dtype)  # Should be float64 or int64

# Creating age bins
bins = [0, 12, 18, 35, 60, 100]
labels = ['Child', 'Teenager', 'Young Adult', 'Adult', 'Senior']
data['age_bin'] = pd.cut(data['age'], bins=bins, labels=labels, right=False)

# Encoding categorical variables
sex_encoder = LabelEncoder()
data['sex_encoded'] = sex_encoder.fit_transform(data['sex'])
embarked_encoder = LabelEncoder()
data['embarked_encoded'] = embarked_encoder.fit_transform(data['embarked'])

# Selecting features
features = ['pclass', 'sex_encoded', 'age', 'sibsp', 'parch', 'fare', 'embarked_encoded']
X = data[features]
y = data['survived']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Generating predictions for the entire dataset
data['predictions'] = model.predict(X)

# Convert 'sex_encoded' back to original labels for final output
data['sex'] = sex_encoder.inverse_transform(data['sex_encoded'])
data['embarked'] = embarked_encoder.inverse_transform(data['embarked_encoded'])

# Selecting relevant columns for Tableau
tableau_data = data[['pclass', 'sex', 'age', 'age_bin', 'sibsp', 'parch', 'fare', 'embarked', 'survived', 'predictions']]

# Verify the data types of the final DataFrame
print(tableau_data.dtypes)

# Save the data to a CSV file
tableau_data.to_csv('titanic_predictions.csv', index=False)

# Download the CSV file
files.download('titanic_predictions.csv')


float64
pclass            int64
sex              object
age             float64
age_bin        category
sibsp             int64
parch             int64
fare            float64
embarked         object
survived          int64
predictions       int64
dtype: object


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>