In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import streamlit as st
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
# Title
st.title("Titanic Survival Prediction")


2025-05-12 09:21:29.422 
  command:

    streamlit run C:\Users\akash\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [3]:
# Load datasets
train_data = pd.read_csv("C:\\Ankit_Singh\\Data Science\\Sample Data\\Titanic_train.csv")
test_data = pd.read_csv("C:\\Ankit_Singh\\Data Science\\Sample Data\\Titanic_test.csv")


In [4]:
# Preprocessing function
def preprocess_data(df):
    df = df.copy()
    df.drop(columns=["Name", "Ticket", "Cabin"], inplace=True, errors='ignore')
    df["Age"] = df["Age"].fillna(df["Age"].median())
    df["Fare"] = df["Fare"].fillna(df["Fare"].median())
    df["Embarked"] = df["Embarked"].fillna(df["Embarked"].mode()[0])
    df["Sex"] = df["Sex"].map({"male": 1, "female": 0})
    df["Embarked"] = df["Embarked"].map({"C": 0, "Q": 1, "S": 2})
    return df


In [5]:
# Apply preprocessing
train_data = preprocess_data(train_data)
test_data = preprocess_data(test_data)


In [6]:
# Define features and target
X_train = train_data.drop(columns=["Survived", "PassengerId"], errors='ignore')
y_train = train_data["Survived"]


In [7]:
# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)


In [8]:
# Train model
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train_scaled, y_train)


In [9]:
X_test = test_data.drop(columns=["Survived", "PassengerId"], errors='ignore')
X_test_scaled = scaler.transform(X_test)
y_pred_test = log_reg_model.predict(X_test_scaled)


In [10]:
if "Survived" in test_data.columns:
    y_true_test = test_data["Survived"]
    st.subheader("Test Data Evaluation")
    st.write(f"Accuracy: {accuracy_score(y_true_test, y_pred_test):.2f}")
    st.write("Confusion Matrix:")
    st.write(confusion_matrix(y_true_test, y_pred_test))
    st.write("Classification Report:")
    st.text(classification_report(y_true_test, y_pred_test))
else:
    st.subheader("Test Predictions")
    st.write(y_pred_test)




In [11]:
# --- User input UI ---
st.subheader("Try it Yourself!")
pclass = st.selectbox("Pclass (Passenger Class)", [1, 2, 3])
sex = st.selectbox("Sex", ['male', 'female'])
age = st.number_input("Age", min_value=0, max_value=100, value=30)
sibsp = st.number_input("Siblings/Spouses Aboard", min_value=0, max_value=10, value=0)
parch = st.number_input("Parents/Children Aboard", min_value=0, max_value=10, value=0)
fare = st.number_input("Fare", min_value=0.0, value=50.0)
embarked = st.selectbox("Embarked", ['C', 'Q', 'S'])



2025-05-12 09:25:11.666 Session state does not function when running a script without `streamlit run`


In [12]:
# Create user input DataFrame
user_data = pd.DataFrame({
    'Pclass': [pclass],
    'Sex': [1 if sex == 'male' else 0],
    'Age': [age],
    'SibSp': [sibsp],
    'Parch': [parch],
    'Fare': [fare],
    'Embarked': [0 if embarked == 'C' else 1 if embarked == 'Q' else 2]
})


In [13]:
# Match training columns
user_data = user_data[X_train.columns]


In [14]:
# Scale and predict
user_data_scaled = scaler.transform(user_data)
prediction = log_reg_model.predict(user_data_scaled)
st.subheader("Your Prediction:")
st.write("Survived" if prediction[0] == 1 else "Not Survived")


