In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_excel('/Raisin_Dataset.xlsx')

print(df.head())
k = 5

print("\nMissing Values:")
print(df.isnull().sum())

df['Class'] = df['Class'].map({'Kecimen': 0, 'Besni': 1})

X = df.drop('Class', axis=1)
y = df['Class']

skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

accuracies = []
precisions = []
recalls = []

In [None]:
for fold, (train_index, test_index) in enumerate(skf.split(X, y), 1):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)

    accuracies.append(acc)
    precisions.append(prec)
    recalls.append(rec)

    print(f"\nFold {fold}:")
    print(f"Accuracy: {acc:.2f}")
    print(f"Precision: {prec:.2f}")
    print(f"Recall: {rec:.2f}")

# Step 5: Average results
print("\nAverage Metrics across 5 folds:")
print(f"Avg Accuracy: {np.mean(accuracies):.2f}")
print(f"Avg Precision: {np.mean(precisions):.2f}")
print(f"Avg Recall: {np.mean(recalls):.2f}")

In [None]:
# Plotting metrics per fold
folds = [f"Fold {i+1}" for i in range(k)]

plt.figure(figsize=(10,6))
plt.plot(folds, accuracies, marker='o', label='Accuracy', color='skyblue')
plt.plot(folds, precisions, marker='o', label='Precision', color='lightgreen')
plt.plot(folds, recalls, marker='o', label='Recall', color='salmon')
plt.title('Performance Metrics per Fold (5-Fold CV)')
plt.ylabel('Score')
plt.ylim(0, 1.1)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

conf_matrix = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)

print("\nConfusion Matrix:")
print(conf_matrix)
print(f"\nAccuracy: {acc:.2f}")
print(f"Precision: {prec:.2f}")
print(f"Recall: {rec:.2f}")

In [None]:
metrics = {'Accuracy': acc, 'Precision': prec, 'Recall': rec}

plt.figure(figsize=(8, 5))
sns.barplot(x=list(metrics.keys()), y=list(metrics.values()))
plt.ylim(0, 1)
plt.title("Decision Tree Evaluation Metrics")
plt.ylabel("Score")
plt.grid(True)
plt.tight_layout()
plt.show()
