In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
from sklearn.feature_selection import RFE
from sklearn.feature_selection import chi2
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", None)
dFrame = pd.read_excel("Data_processed.xlsx")


def cleanData(dataFrame):
    # Null değerleri veya 0.0'ları NaN ile değiştir
    dataFrame.replace(0.0, np.nan, inplace=True)
    
    # Her sütun için veri tipi kontrolü yaparak doldurma işlemi
    for col in dataFrame.columns:
        if dataFrame[col].isnull().any():  # Sütunda NaN varsa işlem yap
            if dataFrame[col].dtype in ['float64', 'int64']:  # Sayısal sütun
                dataFrame[col].fillna(dataFrame[col].mean(), inplace=True)
            else:  # Kategorik sütun
                dataFrame[col].fillna(dataFrame[col].mode()[0], inplace=True)

    
    return dataFrame



def changeToNumeric(dataFrame):
    for col in dataFrame.columns:
        if dataFrame[col].dtype not in ['float64', 'int64']:
            dataFrame[col] = dataFrame[col].astype('category').cat.codes  # Kategorik verileri sayısallaştırma
    return dataFrame


def selectFeatures(dataFrame):
    X = dataFrame.drop('GrainYield', axis=1)
    y = dataFrame['GrainYield']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = DecisionTreeClassifier(random_state=42)
    
    selector = RFE(model, n_features_to_select=2)
    selector = selector.fit(X_train, y_train)
    
    selected_features = X.columns[selector.support_]
    print("Seçilen Özellikler:", selected_features)
    
    X_train_selected = X_train[selected_features]
    X_test_selected = X_test[selected_features]
    
    model.fit(X_train_selected, y_train)
    
    y_pred = model.predict(X_test_selected)
    accuracy = accuracy_score(y_test, y_pred)
    print("Model Doğruluğu:", accuracy)
    
    print(classification_report(y_test, y_pred))
    
    
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(7, 5))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues", xticklabels=np.unique(y), yticklabels=np.unique(y))
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

 



dFrame = changeToNumeric(dFrame)
cleanedData = cleanData(dFrame)
selectFeatures(cleanedData)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataFrame[col].fillna(dataFrame[col].mean(), inplace=True)


ValueError: Unknown label type: continuous. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.