In [3]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

titanic = sns.load_dataset('titanic')

# Clean & encode (lowercase 'sex')
titanic['age'].fillna(titanic['age'].median(), inplace=True)
le_sex = LabelEncoder()
le_emb = LabelEncoder()
titanic['sex_n'] = le_sex.fit_transform(titanic['sex'])
titanic['embarked_n'] = le_emb.fit_transform(titanic['embarked'])

X = titanic[['pclass', 'sex_n', 'age', 'sibsp', 'parch', 'fare', 'embarked_n']]
y = titanic['survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)
acc = model.score(X_test, y_test)
print(f"Accuracy: {acc:.2%}")  # 78-82%

Accuracy: 81.01%


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  titanic['age'].fillna(titanic['age'].median(), inplace=True)
