<a href="https://colab.research.google.com/github/ahmedebeed555/titanic/blob/main/27Jul2025Titanic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data
train = pd.read_csv('/content/train.csv')
test = pd.read_csv('/content/test.csv')

# Select features and target
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']

# Preprocess training data
X = train[features].copy()
X['Sex'] = X['Sex'].map({'male': 0, 'female': 1})
X['Age'] = X['Age'].fillna(X['Age'].median())
X['Fare'] = X['Fare'].fillna(X['Fare'].median())
y = train['Survived']

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Validate model
val_preds = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))

# Preprocess test data
X_test = test[features].copy()
X_test['Sex'] = X_test['Sex'].map({'male': 0, 'female': 1})
X_test['Age'] = X_test['Age'].fillna(X['Age'].median())
X_test['Fare'] = X_test['Fare'].fillna(X['Fare'].median())

# Make predictions
test_preds = model.predict(X_test)

# Prepare submission
submission = pd.DataFrame({
    'PassengerId': test['PassengerId'],
    'Survived': test_preds
})
submission.to_csv('/content/submission.csv', index=False)
print("Submission file saved as submission.csv")


Validation Accuracy: 0.8100558659217877
Submission file saved as submission.csv


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load data
train = pd.read_csv('/content/train.csv')
test = pd.read_csv('/content/test.csv')

# ===== Feature Engineering =====

# Title from Name
for df in [train, test]:
    df['Title'] = df['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
    df['Title'] = df['Title'].replace(
        ['Lady', 'Countess', 'Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'],
        'Rare')
    df['Title'] = df['Title'].replace(['Mlle', 'Ms'], 'Miss')
    df['Title'] = df['Title'].replace('Mme', 'Mrs')

    title_map = {'Mr': 1, 'Miss': 2, 'Mrs': 3, 'Master': 4, 'Rare': 5}
    df['Title'] = df['Title'].map(title_map).fillna(0)

# Family Size & IsAlone
for df in [train, test]:
    df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
    df['IsAlone'] = (df['FamilySize'] == 1).astype(int)

# Map Sex to numeric
for df in [train, test]:
    df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

# Fill missing Age with median by Pclass and Sex
for df in [train, test]:
    df['Age'] = df['Age'].fillna(df.groupby(['Sex', 'Pclass'])['Age'].transform('median'))

# Fill missing Fare in test set
test['Fare'] = test['Fare'].fillna(test['Fare'].median())

# Fill missing Embarked with mode
train['Embarked'] = train['Embarked'].fillna(train['Embarked'].mode()[0])
test['Embarked'] = test['Embarked'].fillna(test['Embarked'].mode()[0])

# Encode Embarked
for df in [train, test]:
    df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int)

# ===== Feature Selection =====
features = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Title', 'FamilySize', 'IsAlone']
X = train[features]
y = train['Survived']
X_test = test[features]

# ===== Model Training =====
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# ===== Evaluation =====
val_preds = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))

# ===== Prediction & Submission =====
test_preds = model.predict(X_test)
submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': test_preds})
submission.to_csv('/content/submission.csv', index=False)
print("✅ Submission file saved as 'submission.csv'")


Validation Accuracy: 0.8379888268156425
✅ Submission file saved as 'submission.csv'


In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load Data
train = pd.read_csv('/content/train.csv')
test = pd.read_csv('/content/test.csv')

# ===== Feature Engineering =====
for df in [train, test]:
    # Title from Name
    df['Title'] = df['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
    df['Title'] = df['Title'].replace(
        ['Lady', 'Countess', 'Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'],
        'Rare')
    df['Title'] = df['Title'].replace(['Mlle', 'Ms'], 'Miss')
    df['Title'] = df['Title'].replace('Mme', 'Mrs')
    df['Title'] = df['Title'].map({'Mr': 1, 'Miss': 2, 'Mrs': 3, 'Master': 4, 'Rare': 5}).fillna(0)

    # Family Features
    df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
    df['IsAlone'] = (df['FamilySize'] == 1).astype(int)

    # Encode Sex
    df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

    # Fill Age by group median
    df['Age'] = df['Age'].fillna(df.groupby(['Sex', 'Pclass'])['Age'].transform('median'))

    # Fill Fare
    df['Fare'] = df['Fare'].fillna(df['Fare'].median())

    # Fill Embarked
    df['Embarked'] = df['Embarked'].fillna('S')
    df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int)

# ===== Select Features =====
features = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Title', 'FamilySize', 'IsAlone']
X = train[features]
y = train['Survived']
X_test = test[features]

# ===== Train/Test Split =====
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# ===== XGBoost Model =====
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train, y_train)

# ===== Validation Accuracy =====
val_preds = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))

# ===== Predict on Test Data =====
test_preds = model.predict(X_test)

# ===== Submission File =====
submission = pd.DataFrame({
    'PassengerId': test['PassengerId'],
    'Survived': test_preds
})
submission.to_csv('/content/submission.csv', index=False)
print("✅ Submission saved as '/content/submission.csv'")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Validation Accuracy: 0.8491620111731844
✅ Submission saved as '/content/submission.csv'
