In [2]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# 1. Load dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
data = pd.read_csv(url)

# 2. Preprocess data - PROPER WAY TO AVOID WARNINGS
# Create a copy of the selected columns to avoid chained assignment
X = data[['Pclass', 'Age', 'Fare', 'Sex']].copy()  # Key fix: .copy()
y = data['Survived'].copy()

# Convert 'Sex' to numeric (proper in-place modification)
X.loc[:, 'Sex'] = X['Sex'].map({'male': 0, 'female': 1})  # Using .loc[]

# Fill missing values (alternative method)
age_median = data['Age'].median()
X['Age'] = X['Age'].fillna(age_median)  # No inplace=True needed

# 3. Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Train model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# 6. Predictions
y_pred = model.predict(X_test_scaled)

# 7. Evaluation
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# 8. Predict new sample
new_passenger = pd.DataFrame([[2, 30, 50, 0]], 
                           columns=X.columns)  # Proper DataFrame with feature names
new_passenger_scaled = scaler.transform(new_passenger)
print("\nPrediction:", model.predict(new_passenger_scaled)[0])


Accuracy: 0.7932960893854749

Confusion Matrix:
 [[89 16]
 [21 53]]

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.85      0.83       105
           1       0.77      0.72      0.74        74

    accuracy                           0.79       179
   macro avg       0.79      0.78      0.78       179
weighted avg       0.79      0.79      0.79       179


Prediction: 0
