In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

#1. Läs in data
df = pd.read_csv('diabetes.csv')
print("Dataset shape:", df.shape)

#2. Rensa och förbered data
# Kolumner som inte kan ha 0 som giltigt värde
cols_to_clean = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
df[cols_to_clean] = df[cols_to_clean].replace(0, np.nan)
df[cols_to_clean] = df[cols_to_clean].fillna(df[cols_to_clean].median())

#3. Dela upp features och målvariabel
X = df.drop('Outcome', axis=1)
y = df['Outcome']

#4. Dela upp i tränings- och testdata
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

#5. Träna en Decision Tree-modell
model = DecisionTreeClassifier(random_state=42, max_depth=4)
model.fit(X_train, y_train)

#6. Prediktion och utvärdering 
y_pred = model.predict(X_test)

print("\n=== Modellutvärdering ===")
print("Accuracy:", round(accuracy_score(y_test, y_pred), 3))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# === 7. Visualisera beslutsträdet ===
# plt.figure(figsize=(15, 8))
# plot_tree(model, feature_names=X.columns, class_names=["No Diabetes", "Diabetes"], filled=True)
# plt.title("Beslutsträd för Diabetesprediktion")
# plt.show()
