## Prepare data

In [106]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('titanic.csv')
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.isnull().sum()/df.shape[0]*100

In [None]:
#check duplicates
df.duplicated().sum() 

In [None]:
# Löschen der Spalte 'Cabin' aus dem DataFrame weil 77% missing values
df.drop('Cabin', axis=1, inplace=True)
#Age abrunden
df['Age'] = np.floor(df['Age'])
#missing values treatment
df['Age'].fillna(df['Age'].median(), inplace=True) # ist's sinvoll?
df['Age'] = df['Age'].astype(int)
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])


In [None]:
#check missing values
df.isnull().sum()/df.shape[0]*100

In [115]:
#extrahiere new column 'Titel',das könnte wichtig für datenanalyse sein?
df["Title"] = df["Name"].apply(lambda x: x.split(",")[1].split(".")[0].strip())
df["Name"] = df["Name"].apply(lambda x: ' '.join([word for word in x.split() if not word.endswith('.')]))


In [None]:
df.describe().T

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df

## Version 1 (not using)

In [None]:
# Import libraries
import base64
import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, cross_validate, train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, make_scorer, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import plotly.graph_objects as go
import plotly.express as px
from dash import Dash, dcc, html

# 1. DATA PREPARATION
df = pd.read_csv('titanic.csv')

# Clean the data
# These columns (PassengerId, Name, Ticket, Cabin) are not useful for predicting survival. They either contain unique identifiers or non-informative text data that do not contribute to the model's predictive power.
df = df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], errors='ignore')
# The 'Age' column has missing values. Using the median to fill these gaps is a common practice because it is robust to outliers and provides a central tendency measure.
df['Age'] = np.floor(df['Age'])
df['Age'].fillna(df['Age'].median(), inplace=True)
# The 'Embarked' column has missing values. Filling them with the most frequent value.
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)
# Features and target: To prepare the data for modeling, we need to separate the features (input variables) from the target (output variable).
X = df.drop(columns=['Survived'])
y = df['Survived']

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN (k=3)": KNeighborsClassifier(n_neighbors=3)
}

# Cross-validation and Bootstrap methods
def cross_validate_method(model, X, y):
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    scoring = {
        'accuracy': 'accuracy',
        'precision': make_scorer(precision_score, average='weighted'),
        'recall': make_scorer(recall_score, average='weighted'),
        'f1': make_scorer(f1_score, average='weighted')
    }
    scores = cross_validate(model, X, y, cv=cv, scoring=scoring, return_estimator=True)
    
    confusion_matrices = []
    for estimator, (train_index, test_index) in zip(scores['estimator'], cv.split(X, y)):
        y_pred = estimator.predict(X[test_index])
        cm = confusion_matrix(y[test_index], y_pred)
        confusion_matrices.append(cm)
    
    mean_confusion_matrix = np.mean(confusion_matrices, axis=0)
    
    return {
        'accuracy': scores['test_accuracy'].mean(),
        'precision': scores['test_precision'].mean(),
        'recall': scores['test_recall'].mean(),
        'f1': scores['test_f1'].mean(),
        'confusion_matrix': mean_confusion_matrix
    }

def bootstrap_632_method(model, X, y):
    """Perform Bootstrap .632 method with extended metrics."""
    train_metrics, test_metrics = [], []
    confusion_matrices = []

    for _ in range(100):  # Number of bootstrap iterations
        # Bootstrap sampling
        bootstrap_indices = np.random.choice(np.arange(len(X)), size=len(X), replace=True)
        oob_mask = ~np.isin(np.arange(len(X)), bootstrap_indices)
        
        X_boot, y_boot = X[bootstrap_indices], y[bootstrap_indices]
        X_oob, y_oob = X[oob_mask], y[oob_mask]

        # Train the model on in-bag data
        model.fit(X_boot, y_boot)
        y_pred_train = model.predict(X_boot)

        # Collect train metrics
        train_metrics.append({
            'accuracy': accuracy_score(y_boot, y_pred_train),
            'precision': precision_score(y_boot, y_pred_train, average='weighted'),
            'recall': recall_score(y_boot, y_pred_train, average='weighted'),
            'f1': f1_score(y_boot, y_pred_train, average='weighted'),
        })

        # Collect test metrics if there are out-of-bag samples
        if len(y_oob) > 0:
            y_pred_oob = model.predict(X_oob)
            test_metrics.append({
                'accuracy': accuracy_score(y_oob, y_pred_oob),
                'precision': precision_score(y_oob, y_pred_oob, average='weighted'),
                'recall': recall_score(y_oob, y_pred_oob, average='weighted'),
                'f1': f1_score(y_oob, y_pred_oob, average='weighted'),
            })
            confusion_matrices.append(confusion_matrix(y_oob, y_pred_oob))

    # Aggregate train and test metrics
    train_metrics_mean = {metric: np.mean([m[metric] for m in train_metrics]) for metric in train_metrics[0]}
    test_metrics_mean = {metric: np.mean([m[metric] for m in test_metrics]) for metric in test_metrics[0]}

    # Combine metrics using the .632 formula
    combined_metrics = {metric: 0.368 * train_metrics_mean[metric] + 0.632 * test_metrics_mean[metric] for metric in train_metrics_mean}
    mean_confusion_matrix = np.mean(confusion_matrices, axis=0) if confusion_matrices else None

    combined_metrics['confusion_matrix'] = mean_confusion_matrix
    return combined_metrics

# Evaluate models
metrics_results = {}
confusion_matrices = {}
crossval_results = {}
bootstrap_results = {}


for model_name, model in models.items():
    # Cross-validation
    crossval_metrics = cross_validate_method(model, X, y)
    
    # Bootstrap
    bootstrap_metrics = bootstrap_632_method(model, X, y)
    
    # Save metrics
    metrics_results[model_name] = {
        'CrossVal Accuracy': crossval_metrics['accuracy'],
        'CrossVal Precision': crossval_metrics['precision'],
        'CrossVal Recall': crossval_metrics['recall'],
        'CrossVal F1-Score': crossval_metrics['f1'],
        'Bootstrap Accuracy': bootstrap_metrics['accuracy'],
        'Bootstrap Precision': bootstrap_metrics['precision'],
        'Bootstrap Recall': bootstrap_metrics['recall'],
        'Bootstrap F1-Score': bootstrap_metrics['f1']
    }
    
    # Save confusion matrices
    crossval_results[model_name] = crossval_metrics['confusion_matrix']
    bootstrap_results[model_name] = bootstrap_metrics['confusion_matrix']

# 3. Build Dash App
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Titanic Classification Models", style={'textAlign': 'center'}),

    # Performance Metrics
    html.H3("Performance Metrics"),
    dcc.Graph(figure=px.bar(
        pd.DataFrame(metrics_results).reset_index(),
        x='index', y=list(models.keys()),
        barmode='group',
        title="Performance Metrics for All Models",
        labels={'index': 'Metrics', 'value': 'Score'}
    )),

    html.H3("Confusion Matrices"),
    html.Div([
        dcc.Graph(figure=px.imshow(confusion_matrices[model], text_auto=True, title=f"{model}", width=300, height=300))
        for model in models.keys()
    ], style={'display': 'flex', 'flexDirection': 'row'}),

    # Cross-validation
    html.H3("Cross-Validation Results"),
    dcc.Graph(figure=px.line(
        x=list(range(1, 11)),
        y=[crossval_results[model] for model in models.keys()],
        title="Cross-Validation Accuracy (10-Fold)",
        labels={'x': 'Fold', 'y': 'Accuracy', 'color': 'Models'}
    )),

    # Bootstrap .632 Results
    html.H3("Bootstrap .632 Results"),
    dcc.Graph(figure=px.bar(
        x=list(models.keys()),
        y=[bootstrap_results[model] for model in models.keys()],
        title="Bootstrap .632 Accuracy",
        labels={'x': 'Model', 'y': 'Bootstrap Accuracy', 'color': 'Models'},
        color=list(models.keys())
    )),
])

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

## Version 2: This one works but it retrain dataset for performance, so actually it's wrong.

In [None]:
# Import libraries
import base64
import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import plotly.graph_objects as go
import plotly.express as px
from dash import Dash, dcc, html

# 1. DATA PREPARATION
df = pd.read_csv('titanic.csv')

# Data Cleaning
df = df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], errors='ignore')
df['Age'] = np.floor(df['Age'])
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)

# Features and target
X = df.drop(columns=['Survived'])
y = df['Survived']

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN (k=3)": KNeighborsClassifier(n_neighbors=3)
}

# Cross-validation and Bootstrap methods
def cross_validate_method(model, X, y):
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    return cross_val_score(model, X, y, cv=cv, scoring='accuracy')

def bootstrap_632_method(model, X, y):
    train_acc, test_acc = [], []
    for _ in range(100):
        X_boot, y_boot = resample(X, y, replace=True)
        X_oob = X[~np.in1d(np.arange(X.shape[0]), X_boot.index)]
        y_oob = y[~np.in1d(np.arange(y.shape[0]), X_boot.index)]
        model.fit(X_boot, y_boot)
        acc_train = accuracy_score(y_boot, model.predict(X_boot))
        acc_test = accuracy_score(y_oob, model.predict(X_oob)) if len(y_oob) > 0 else 0
        train_acc.append(acc_train)
        test_acc.append(acc_test)
    return np.mean(0.368 * np.array(train_acc) + 0.632 * np.array(test_acc))

# Evaluate models
metrics_results = {}
confusion_matrices = {}
crossval_results = {}
bootstrap_results = {}

for model_name, model in models.items():
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Save metrics
    metrics_results[model_name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1-Score': f1_score(y_test, y_pred)
    }
    confusion_matrices[model_name] = confusion_matrix(y_test, y_pred)
    crossval_results[model_name] = cross_validate_method(model, X, y)
    bootstrap_results[model_name] = bootstrap_632_method(model, X, y)

# 3. Build Dash App
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Titanic Classification Models", style={'textAlign': 'center'}),

    # Performance Metrics
    html.H3("Performance Metrics"),
    dcc.Graph(figure=px.bar(
        pd.DataFrame(metrics_results).reset_index(),
        x='index', y=list(models.keys()),
        barmode='group',
        title="Performance Metrics for All Models",
        labels={'index': 'Metrics', 'value': 'Score'}
    )),

    # Confusion Matrices
    html.H3("Confusion Matrices"),
    html.Div([
        dcc.Graph(figure=px.imshow(confusion_matrices[model], text_auto=True, title=f"Confusion Matrix: {model}", width=300, height=300))
        for model in models.keys()
    ], style={'display': 'flex', 'flexDirection': 'row', 'flexWrap': 'wrap'}),
    
    # Cross-validation
    html.H3("Cross-Validation Results"),
    dcc.Graph(figure=px.line(
        x=list(range(1, 11)),
        y=[crossval_results[model] for model in models.keys()],
        title="Cross-Validation Accuracy (10-Fold)",
        labels={'x': 'Fold', 'y': 'Accuracy', 'color': 'Models'}
    )),

    # Bootstrap .632 Results
    html.H3("Bootstrap .632 Results"),
    dcc.Graph(figure=px.bar(
        x=list(models.keys()),
        y=[bootstrap_results[model] for model in models.keys()],
        title="Bootstrap .632 Accuracy",
        labels={'x': 'Model', 'y': 'Bootstrap Accuracy'},
        color=list(models.keys()),
        # color_discrete_sequence=px.colors.qualitative.Set1
    )),


])

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

## Version 3 (huo)

In [2]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import plotly.express as px
from dash import Dash, dcc, html
import matplotlib.pyplot as plt
import io
import base64

# 1. DATA PREPARATION
df = pd.read_csv('titanic.csv')

# Clean the data
# These columns (PassengerId, Name, Ticket, Cabin) are not useful for predicting survival. They either contain unique identifiers or non-informative text data that do not contribute to the model's predictive power.
df = df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], errors='ignore')
# The 'Age' column has missing values. Using the median to fill these gaps is a common practice because it is robust to outliers and provides a central tendency measure.
df['Age'] = np.floor(df['Age'])
df['Age'].fillna(df['Age'].median(), inplace=True)
# The 'Embarked' column has missing values. Filling them with the most frequent value.
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)
# Features and target: To prepare the data for modeling, we need to separate the features (input variables) from the target (output variable).
X = df.drop(columns=['Survived'])
y = df['Survived']

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN (k=3)": KNeighborsClassifier(n_neighbors=3)
}

def train_model(model, data_train):
    """Trains the model on the training data."""
    for data in data_train:
        X_train, y_train = data[:2]  # Only use the first two values
        model.fit(X_train, y_train)
    return model

def cross_validation_data(X, y):
    """Generates train-test splits for cross-validation."""
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    data_train = []
    data_test = []

    for train_idx, test_idx in cv.split(X, y):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        data_train.append((X_train, y_train))
        data_test.append((X_test, y_test))

    return data_train, data_test

# Cross-validation method
def cross_validate_evaluation(trained_model, data_test):
    """Perform cross-validation and return metrics and confusion matrix."""
    metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []} # Store metrics for each fold
    confusion_matrices = [] # Store confusion matrices for each fold

    for X_test, y_test in data_test:
        # predict the data
        y_pred = trained_model.predict(X_test)
        metrics['accuracy'].append(accuracy_score(y_test, y_pred))
        metrics['precision'].append(precision_score(y_test, y_pred, average='weighted', zero_division=0))
        metrics['recall'].append(recall_score(y_test, y_pred, average='weighted', zero_division=0))
        metrics['f1'].append(f1_score(y_test, y_pred, average='weighted', zero_division=0))
        confusion_matrices.append(confusion_matrix(y_test, y_pred))

    avg_confusion_matrix = np.mean(confusion_matrices, axis=0)
    avg_metrics = {key: np.mean(value) for key, value in metrics.items()}
    
    return { # return a dictionary containing the average metrics and the average confusion matrix.
        **avg_metrics, # Unpack metrics
        'confusion_matrix': avg_confusion_matrix
        # 'decision_tree': dt_image
    }

def bootstrap_632_data(X, y):
    """Generates train-test splits for the Bootstrap .632 method."""
    data = []
    for _ in range(100):  # Number of bootstrap iterations
        X_boot, y_boot = resample(X, y, replace=True, n_samples=len(X), random_state=42)
        
        oob_indices = np.setdiff1d(np.arange(len(X)), np.unique(X_boot, return_index=True)[1])
        X_oob, y_oob = X[oob_indices], y[oob_indices]
        data.append((X_boot, y_boot, X_oob, y_oob, oob_indices))

    return data

def bootstrap_632_evaluation(trained_model, data):
    train_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    test_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    confusion_matrices = []

    for X_boot, y_boot, X_oob, y_oob, oob_indices in data:

        y_pred_train = trained_model.predict(X_boot) 

        # Collect train metrics
        train_metrics['accuracy'].append(accuracy_score(y_boot, y_pred_train))
        train_metrics['precision'].append(precision_score(y_boot, y_pred_train, average='weighted', zero_division=0))
        train_metrics['recall'].append(recall_score(y_boot, y_pred_train, average='weighted', zero_division=0))
        train_metrics['f1'].append(f1_score(y_boot, y_pred_train, average='weighted', zero_division=0))

        # Collect test (OOB) metrics if there are out-of-bag samples
        if len(oob_indices) > 0:
            y_pred_oob = trained_model.predict(X_oob)
            test_metrics['accuracy'].append(accuracy_score(y_oob, y_pred_oob))
            test_metrics['precision'].append(precision_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            test_metrics['recall'].append(recall_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            test_metrics['f1'].append(f1_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            confusion_matrices.append(confusion_matrix(y_oob, y_pred_oob))

    # Aggregate train and test metrics
    train_metrics_mean = {metric: np.mean(train_metrics[metric]) for metric in train_metrics}
    test_metrics_mean = {metric: np.mean(test_metrics[metric]) for metric in test_metrics}

    # Combine metrics using the .632 formula
    combined_metrics = {metric: 0.368 * train_metrics_mean[metric] + 0.632 * test_metrics_mean[metric] for metric in train_metrics}
    avg_confusion_matrix = np.mean(confusion_matrices, axis=0) if confusion_matrices else None

    combined_metrics['confusion_matrix'] = avg_confusion_matrix
    return combined_metrics

    
# Bootstrap method
def bootstrap_632_method(model, X, y):
    """Perform Bootstrap .632 method with extended metrics."""
    train_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    test_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    confusion_matrices = []

    for _ in range(100):  # Number of bootstrap iterations
        # Bootstrap sampling using resample
        X_boot, y_boot = resample(X, y, replace=True, n_samples=len(X), random_state=42)
        oob_indices = np.setdiff1d(np.arange(len(X)), np.unique(X_boot, return_index=True)[1])

        X_oob, y_oob = X[oob_indices], y[oob_indices]

        # Train the model on in-bag data
        model.fit(X_boot, y_boot)
        y_pred_train = model.predict(X_boot)

        # Collect train metrics
        train_metrics['accuracy'].append(accuracy_score(y_boot, y_pred_train))
        train_metrics['precision'].append(precision_score(y_boot, y_pred_train, average='weighted', zero_division=0))
        train_metrics['recall'].append(recall_score(y_boot, y_pred_train, average='weighted', zero_division=0))
        train_metrics['f1'].append(f1_score(y_boot, y_pred_train, average='weighted', zero_division=0))

        # Collect test (OOB) metrics if there are out-of-bag samples
        if len(oob_indices) > 0:
            y_pred_oob = model.predict(X_oob)
            test_metrics['accuracy'].append(accuracy_score(y_oob, y_pred_oob))
            test_metrics['precision'].append(precision_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            test_metrics['recall'].append(recall_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            test_metrics['f1'].append(f1_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            confusion_matrices.append(confusion_matrix(y_oob, y_pred_oob))

    # Aggregate train and test metrics
    train_metrics_mean = {metric: np.mean(train_metrics[metric]) for metric in train_metrics}
    test_metrics_mean = {metric: np.mean(test_metrics[metric]) for metric in test_metrics}

    # Combine metrics using the .632 formula
    combined_metrics = {metric: 0.368 * train_metrics_mean[metric] + 0.632 * test_metrics_mean[metric] for metric in train_metrics}
    avg_confusion_matrix = np.mean(confusion_matrices, axis=0) if confusion_matrices else None

    combined_metrics['confusion_matrix'] = avg_confusion_matrix
    return combined_metrics

# decision tree visualisieren
def decision_tree_visualization_cv(trained_model, X):
    
    buf = io.BytesIO()
    plt.figure(figsize=(100, 90))
    plot_tree(trained_model, feature_names=X.columns, class_names=["Not Survived", "Survived"], filled=True)
    plt.savefig(buf, format="png")
    plt.close()
    return base64.b64encode(buf.getbuffer()).decode("utf8")

def decision_tree_visualization_bs(trained_model, X):
    
    buf = io.BytesIO()
    plt.figure(figsize=(100, 90))
    plot_tree(trained_model, feature_names=X.columns, class_names=["Not Survived", "Survived"], filled=True)
    plt.savefig(buf, format="png")
    plt.close()
    return base64.b64encode(buf.getbuffer()).decode("utf8")

# Evaluate models
crossval_results = {}
bootstrap_results = {}
decision_tree_cv = {}
decision_tree_bs = {}

for model_name, model in models.items():
    # Cross-validation
    data_train_cv, data_test_cv = cross_validation_data(X_scaled, y)
    trained_model_cv = train_model(model, data_train_cv)
    crossval_results[model_name] = cross_validate_evaluation(trained_model_cv, data_test_cv)
    
    # Bootstrap
    data_bs = bootstrap_632_data(X_scaled, y)
    trained_model_bs = train_model(model, data_bs)
    bootstrap_results[model_name] = bootstrap_632_evaluation(trained_model_bs, data_bs)

    # Store decision tree visualization if the model is DecisionTreeClassifier
    if isinstance(model, DecisionTreeClassifier):
        decision_tree_cv = decision_tree_visualization_cv(trained_model_cv, X)
        decision_tree_bs = decision_tree_visualization_bs(trained_model_bs, X)


# 3. Build Dash App
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Titanic Classification Models Evaluation", style={'textAlign': 'center'}),

    # Metrics Comparison
    html.H3("Cross-Validation Metrics Comparison"),
    dcc.Graph(figure=px.bar(
        pd.DataFrame([
            {'Metric': metric, 'Model': model, 'Score': crossval_results[model][metric]}
            for model in models.keys()
            for metric in ['accuracy', 'precision', 'recall', 'f1']
        ]),
        x='Metric', y='Score', color='Model', barmode='group',
        title="Cross-Validation Metrics Comparison",
        labels={'Metric': 'Metrics', 'Score': 'Score'}
    )),

    html.H3("Bootstrap Metrics Comparison"),
    dcc.Graph(figure=px.bar(
        pd.DataFrame([
            {'Metric': metric, 'Model': model, 'Score': bootstrap_results[model][metric]}
            for model in models.keys()
            for metric in ['accuracy', 'precision', 'recall', 'f1']
        ]),
        x='Metric', y='Score', color='Model', barmode='group',
        title="Bootstrap Metrics Comparison",
        labels={'Metric': 'Metrics', 'Score': 'Score'}
    )),

    # Confusion Matrices
    html.H3("Confusion Matrices: Cross-Validation"),
    html.Div([
        html.Div([
            dcc.Graph(figure=px.imshow(crossval_results[model]['confusion_matrix'], text_auto=True,
                                        title=f"{model}",
                                        color_continuous_scale='Blues').update_layout(
                                            autosize=False,
                                            width=300,
                                            height=300
                                        ))
        ], style={'margin': '10px'})
        for model in models.keys()
    ], style={'display': 'flex', 'flexDirection': 'row', 'flexWrap': 'wrap'}),
    
    html.H3("Confusion Matrices: Bootstrap"),
    html.Div([
        html.Div([
            dcc.Graph(figure=px.imshow(bootstrap_results[model]['confusion_matrix'], text_auto=True,
                                        title=f"{model}",
                                        color_continuous_scale='Reds').update_layout(
                                            autosize=False,
                                            width=300,
                                            height=300
                                        ))
        ], style={'margin': '10px'})
        for model in models.keys()
    ], style={'display': 'flex', 'flexDirection': 'row', 'flexWrap': 'wrap'}),

    # Decision Tree visualisierung
    html.H3("Decision Tree(Crossvalidation)"),
    html.Img(src="data:image/png;base64,{}".format(decision_tree_cv), style={'width': '100%', 'height': 'auto'}),
    html.H3("Decision Tree(Bootstrap)"),
    html.Img(src="data:image/png;base64,{}".format(decision_tree_bs), style={'width': '100%', 'height': 'auto'}),

])

if __name__ == '__main__':
    app.run_server(debug=True)


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





## Version4


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
import plotly.graph_objects as go
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.tree import DecisionTreeClassifier, plot_tree
import plotly.express as px
from dash import Dash, dcc, html
import io
import base64
import matplotlib.pyplot as plt


# 1. DATA PREPARATION
df = pd.read_csv('titanic.csv')

# Clean the data
df = df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], errors='ignore')
df['Age'] = np.floor(df['Age'])
df['Age'] = df['Age'].fillna(df['Age'].median())
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)
X = df.drop(columns=['Survived'])
y = df['Survived']

# Feature Scaling
#scaler = StandardScaler()
#X_scaled = scaler.fit_transform(X)

# Models array (2D array for Cross-Validation and Bootstrap)
models = {
    "Logistic Regression": [LogisticRegression(max_iter=1000), LogisticRegression(max_iter=1000)],
    "Decision Tree": [DecisionTreeClassifier(random_state=42), DecisionTreeClassifier(random_state=42)],
    "KNN (k=3)": [KNeighborsClassifier(n_neighbors=3), KNeighborsClassifier(n_neighbors=3)],
}

# Cross-validation method
def cross_validate_method(model, X, y):
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    confusion_matrices = []

    for train_idx, test_idx in cv.split(X, y):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx] if isinstance(X, pd.DataFrame) else (X[train_idx], X[test_idx])
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx] if isinstance(y, pd.Series) else (y[train_idx], y[test_idx])

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        metrics['accuracy'].append(accuracy_score(y[test_idx], y_pred))
        metrics['precision'].append(precision_score(y[test_idx], y_pred, average='weighted', zero_division=0))
        metrics['recall'].append(recall_score(y[test_idx], y_pred, average='weighted', zero_division=0))
        metrics['f1'].append(f1_score(y[test_idx], y_pred, average='weighted', zero_division=0))
        confusion_matrices.append(confusion_matrix(y[test_idx], y_pred))

    avg_confusion_matrix = np.mean(confusion_matrices, axis=0)
    avg_metrics = {key: np.mean(value) for key, value in metrics.items()}

    return { 
        **avg_metrics,
        'confusion_matrix': avg_confusion_matrix
    }

# Bootstrap method
def bootstrap_632_method(model, X, y):
    train_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    test_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
    confusion_matrices = []

    for _ in range(100):  
        # Resample for bootstrap
        X_boot, y_boot = resample(X, y, replace=True, n_samples=len(X), random_state=42)

        # Berechne die Out-of-Bag (OOB)-Indizes
        oob_indices = np.setdiff1d(np.arange(len(X)), np.unique(X_boot, return_index=True)[1])

        # Verwende .iloc[] für pandas DataFrame/Series 
        X_oob = X.iloc[oob_indices] if isinstance(X, pd.DataFrame) else X[oob_indices]
        y_oob = y.iloc[oob_indices] if isinstance(y, pd.Series) else y[oob_indices]

        # Trainiere das Modell mit den Bootstrap-Daten
        model.fit(X_boot, y_boot)
        y_pred_train = model.predict(X_boot)

        train_metrics['accuracy'].append(accuracy_score(y_boot, y_pred_train))
        train_metrics['precision'].append(precision_score(y_boot, y_pred_train, average='weighted', zero_division=0))
        train_metrics['recall'].append(recall_score(y_boot, y_pred_train, average='weighted', zero_division=0))
        train_metrics['f1'].append(f1_score(y_boot, y_pred_train, average='weighted', zero_division=0))

        if len(oob_indices) > 0:
            y_pred_oob = model.predict(X_oob)
            test_metrics['accuracy'].append(accuracy_score(y_oob, y_pred_oob))
            test_metrics['precision'].append(precision_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            test_metrics['recall'].append(recall_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            test_metrics['f1'].append(f1_score(y_oob, y_pred_oob, average='weighted', zero_division=0))
            confusion_matrices.append(confusion_matrix(y_oob, y_pred_oob))
            
    train_metrics_mean = {metric: np.mean(train_metrics[metric]) for metric in train_metrics}
    test_metrics_mean = {metric: np.mean(test_metrics[metric]) for metric in test_metrics}

    combined_metrics = {metric: 0.368 * train_metrics_mean[metric] + 0.632 * test_metrics_mean[metric] for metric in train_metrics}
    avg_confusion_matrix = np.mean(confusion_matrices, axis=0) if confusion_matrices else None

    combined_metrics['confusion_matrix'] = avg_confusion_matrix
    return combined_metrics

# Visualize Decision Tree (cross-validation and bootstrap)
def decision_tree_visualization(model, X, y):
    # Visualize the Decision Tree
    buf = io.BytesIO()
    plt.figure(figsize=(100, 90))
    plot_tree(model, feature_names=X.columns, class_names=["Not Survived", "Survived"], filled=True)
    plt.savefig(buf, format="png")
    plt.close()
    return base64.b64encode(buf.getbuffer()).decode("utf8")

# Evaluate models
crossval_results = {}
bootstrap_results = {}

for model_name, model_pair in models.items():
    crossval_results[model_name] = cross_validate_method(model_pair[0], X, y)
    bootstrap_results[model_name] = bootstrap_632_method(model_pair[1], X, y)


# For decision tree visualization (cross-validation and bootstrap)
decision_tree_cv = decision_tree_visualization(models["Decision Tree"][0], X, y)
decision_tree_bootstrap = decision_tree_visualization(models["Decision Tree"][1], X, y)

# 3. Build Dash App
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Titanic Classification Models Evaluation", style={'textAlign': 'center'}),

    # Metrics Comparison
    html.H3("Cross-Validation Metrics Comparison"),
    dcc.Graph(figure=go.Figure(
        data=[go.Bar(
            x=['accuracy', 'precision', 'recall', 'f1'],
            y=[crossval_results[model][metric] for metric in ['accuracy', 'precision', 'recall', 'f1']],
            name=model
        ) for model in models.keys()],
        layout=go.Layout(
            barmode='group',
            title="Cross-Validation Metrics Comparison",
            xaxis=dict(title="Metric"),
            yaxis=dict(title="Score")
        )
    )),
    
    html.H3("Bootstrap Metrics Comparison"),
    dcc.Graph(figure=go.Figure(
        data=[go.Bar(
            x=['accuracy', 'precision', 'recall', 'f1'],
            y=[bootstrap_results[model][metric] for metric in ['accuracy', 'precision', 'recall', 'f1']],
            name=model
        ) for model in models.keys()],
        layout=go.Layout(
            barmode='group',
            title="Bootstrap Metrics Comparison",
            xaxis=dict(title="Metric"),
            yaxis=dict(title="Score")
        )
    )),
    html.H3("Confusion Matrices: Cross-Validation"),
    html.Div([
        html.Div([
            dcc.Graph(figure=px.imshow(crossval_results[model]['confusion_matrix'], text_auto=True,
                                        title=f"{model}",
                                        color_continuous_scale='Blues').update_layout(
                                            autosize=False,
                                            width=300,
                                            height=300
                                        ))
        ], style={'margin': '10px'})
        for model in models.keys()
    ], style={'display': 'flex', 'flexDirection': 'row', 'flexWrap': 'wrap'}),

    html.H3("Confusion Matrices: Bootstrap"),
    html.Div([
        html.Div([
            dcc.Graph(figure=px.imshow(bootstrap_results[model]['confusion_matrix'], text_auto=True,
                                        title=f"{model}",
                                        color_continuous_scale='Reds').update_layout(
                                            autosize=False,
                                            width=300,
                                            height=300
                                        ))
        ], style={'margin': '10px'})
        for model in models.keys()
    ], style={'display': 'flex', 'flexDirection': 'row', 'flexWrap': 'wrap'}),

    

    # Decision Tree visualizations
    html.H3("Decision Tree (Cross-Validation)"),
    html.Img(src="data:image/png;base64,{}".format(decision_tree_cv), style={'width': '100%', 'height': 'auto'}),

    html.H3("Decision Tree (Bootstrap)"),
    html.Img(src="data:image/png;base64,{}".format(decision_tree_bootstrap), style={'width': '100%', 'height': 'auto'})
])

if __name__ == '__main__':
    app.run_server(debug=True)
