In [None]:
# src/load_data.py
# Daten laden und in der Vorschau anzeigen...

import pandas as pd
from sklearn.datasets import load_iris

def load_iris_data():
    iris = load_iris()
    df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    df['target'] = iris.target
    return df

if __name__ == "__main__":
    df = load_iris_data()
    print("Datenvorschau:")
    print(df.head())
    print("\nZielklassen:", df['target'].unique())




In [None]:
# ...
# src/utils.py

TARGET_LABELS = {
    0: "setosa",
    1: "versicolor",
    2: "virginica"
}

def add_label_column(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["label"] = df["target"].map(TARGET_LABELS)
    return df

In [None]:
# src/visualize.py

import seaborn as sns
import matplotlib.pyplot as plt

def plot_pairplot(df: pd.DataFrame) -> None:
    """
    Pairplot d. Features
    """
    sns.set(style="whitegrid")
    sns.pairplot(df, hue="label", palette="Set2", diag_kind="kde")
    plt.suptitle("Iris-Datensatz – Pairplot", y=1.02)
    plt.show()

def plot_feature_distributions(df: pd.DataFrame) -> None:
    """
    Histogramme für alle Features.
    """
    sns.set(style="whitegrid")
    features = df.columns[:-1]  
    for feature in features:
        plt.figure(figsize=(6, 4))
        sns.histplot(data=df, x=feature, hue="label", kde=True, palette="Set2", bins=20)
        plt.title(f"Verteilung von: {feature}")
        plt.tight_layout()
        plt.show()

if __name__ == "__main__":
    df = load_iris_data()
    df = add_label_column(df)
    plot_pairplot(df)
    plot_feature_distributions(df)


In [None]:
# modell trainieren
# src/model.py

import joblib
import os
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

def save_model(model, path: str = "models/iris_model.pkl") -> None:
    os.makedirs(os.path.dirname(path), exist_ok=True)
    joblib.dump(model, path)
    print(f"Modell gespeichert unter: {path}")

def load_model(path: str = "models/iris_model.pkl") -> LogisticRegression:
    model = joblib.load(path)
    print(f"Modell geladen von: {path}")
    return model

def train_model(df: pd.DataFrame) -> LogisticRegression:
    X = df.drop(columns=["target", "label"])
    y = df["target"]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = LogisticRegression(max_iter=200)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    print("Model Accuracy:", round(acc, 3))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=df["label"].unique()))

    return model

if __name__ == "__main__":
    df = load_iris_data()
    df = add_label_column(df)
    model = train_model(df)
    save_model(model)

In [None]:
# src/evaluate.py

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

def evaluate_model():
    df = load_iris_data()
    df = add_label_column(df)
    
    X = df.drop(columns=["target", "label"])
    y = df["target"]
    
    _, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = load_model()
    y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy:", round(acc, 3))
    
    print("\nClassification Report:")
    target_names = [TARGET_LABELS[label] for label in sorted(set(y_test))]
    print(classification_report(y_test, y_pred, target_names=target_names))

if __name__ == "__main__":
    evaluate_model()


In [None]:
# src/predict.py

import numpy as np

FEATURE_NAMES = [
    "sepal length (cm)",
    "sepal width (cm)",
    "petal length (cm)",
    "petal width (cm)"
]

def predict_flower(features: list[float]) -> str:
    if len(features) != 4:
        raise ValueError("Genau vier numerische Eingabewerte werden benötigt.")
    
    model = load_model()

    input_df = pd.DataFrame([features], columns=FEATURE_NAMES)

    prediction = model.predict(input_df)[0]
    label = TARGET_LABELS[prediction]
    return label

if __name__ == "__main__":
    # Bsp: Setosa
    test_input = [5.1, 3.5, 1.4, 0.2]
    result = predict_flower(test_input)
    print("Vorhersage für Eingabe", test_input, "→", result)