In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Veri Temizleme
nullValues = 0.000000
def cleanData(dataFrame):
    for index in dataFrame.columns:
        if nullValues in dataFrame[index].values:
            dataFrame[index] = dataFrame[index].mean()
    return dataFrame

# Sayısal Veriye Dönüştürme
def changeToNumeric(dataFrame):
    for col in dataFrame.columns:
        if dataFrame[col].dtype not in ['float64', 'int64']:
            dataFrame[col] = dataFrame[col].astype('category').cat.codes  # Kategorik verileri sayısallaştırma
    return dataFrame

# Özellik Seçimi ve Modelleme
def selectFeatures(dataFrame):
    X = dataFrame.drop('GrainYield', axis=1)  # Hedef değişkeni çıkarma
    y = dataFrame['GrainYield']  # Hedef değişken

    # Veriyi eğitim ve test olarak ayırma
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = DecisionTreeClassifier(random_state=42)
    
    # RFE ile özellik seçimi
    selector = RFE(model, n_features_to_select=2)
    selector = selector.fit(X_train, y_train)
    
    selected_features = X.columns[selector.support_]
    print("Seçilen Özellikler:", selected_features)
    
    # Seçilen özellikler ile eğitim verisi hazırlama
    X_train_selected = X_train[selected_features]
    X_test_selected = X_test[selected_features]
    
    # Modeli eğitme
    model.fit(X_train_selected, y_train)
    
    # Tahmin yapma
    y_pred = model.predict(X_test_selected)
    accuracy = accuracy_score(y_test, y_pred)
    print("Model Doğruluğu:", accuracy)
    
    # Sınıflandırma raporu
    print(classification_report(y_test, y_pred))
    
    # Konfüzyon matrisi
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(7, 5))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues", xticklabels=np.unique(y), yticklabels=np.unique(y))
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

# Veriyi temizleme ve sayısallaştırma
cleanedData = cleanData(dFrame)
dFrame = changeToNumeric(cleanedData)

# Özellik seçimi ve modelleme
selectFeatures(dFrame)


ValueError: Unknown label type: continuous. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.