In [7]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score

In [2]:
df = sns.load_dataset("titanic")

In [3]:
df = df[["survived", "sex", "age", "fare"]].dropna()

df["sex"] = df["sex"].map({"male": 0, "female": 1})

In [4]:
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x="sex", hue="survived")
plt.xticks([0, 1], ["male", "female"])
plt.xlabel("Geschlecht")
plt.ylabel("Anzahl")
plt.title("Überlebensrate nach Geschlecht")
plt.legend(title="Überlebt", labels=["Nein", "Ja"])
plt.tight_layout()
plt.savefig("survival_by_sex.png", dpi=300)
plt.close()

In [5]:
X = df[["age", "fare", "sex"]]
y = df["survived"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [6]:
cm = confusion_matrix(y_test, y_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Nicht überlebt", "Überlebt"])
disp.plot(cmap="Blues")
plt.title("Konfusionsmatrix: Logistische Regression")
plt.tight_layout()
plt.savefig("confusion_matrix.png", dpi=300)
plt.close()

In [8]:
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.2%}")

Accuracy: 74.83%
