In [16]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
import numpy as np
from scipy.io import arff

In [17]:

df1 = pd.read_csv(r"datasets\wine+quality\winequality-red.csv", sep=";")

data, meta = arff.loadarff(r'D:\ML\datasets\DryBeanDataset\Dry_Bean_Dataset.arff')
df2 = pd.DataFrame(data)
df2['Class'] = df2['Class'].astype(str)
le = LabelEncoder()
df2['Class'] = le.fit_transform(df2['Class'])

df2=df2.sample(500)

df3 = pd.read_csv(r"datasets\Raisin_Dataset\Raisin_Dataset.csv")

In [18]:

datasets_name = ['WINE', 'DRYBEAN', 'RAISIN']
datasets = {
    'datasets': [df1, df2, df3],
    'X': [df1.drop(columns='quality'), df2.drop(columns='Class'), df3.drop(columns='Class')],
    'y': [df1['quality'], df2['Class'], df3['Class']]
}

In [19]:
combinations = [
    {'n_estimators': 50, 'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 10},
    {'n_estimators': 100, 'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 5, 'min _samples_leaf': 4, 'max_features': 'sqrt'},
    {'n_estimators': 200, 'criterion': 'log_loss', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 8, 'max_features': 'log2'}
]

In [20]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

results = []

for i, dataset_name in enumerate(datasets_name):
    X = datasets['X'][i]
    y = datasets['y'][i]

    
    print(f"\nDataset: {dataset_name}")
    
    for params in combinations:
        model = RandomForestClassifier(**params, random_state=42)
        cv_scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')
        mean_accuracy = np.mean(cv_scores)
        
        results.append({
            'Dataset': dataset_name,
            'n_estimators': params['n_estimators'],
            'criterion': params['criterion'],
            'max_depth': params['max_depth'],
            'min_samples_split': params['min_samples_split'],
            'min_samples_leaf': params['min_samples_leaf'],
            'max_features': params['max_features'],
            'Mean_CV_Accuracy': mean_accuracy
        })

pd.DataFrame(results)


Dataset: WINE

Dataset: DRYBEAN

Dataset: RAISIN


Unnamed: 0,Dataset,n_estimators,criterion,max_depth,min_samples_split,min_samples_leaf,max_features,Mean_CV_Accuracy
0,WINE,50,gini,1,2,1,10,0.544751
1,WINE,100,entropy,5,5,4,sqrt,0.612929
2,WINE,200,log_loss,10,10,8,log2,0.639197
3,DRYBEAN,50,gini,1,2,1,10,0.588
4,DRYBEAN,100,entropy,5,5,4,sqrt,0.888
5,DRYBEAN,200,log_loss,10,10,8,log2,0.888
6,RAISIN,50,gini,1,2,1,10,0.857778
7,RAISIN,100,entropy,5,5,4,sqrt,0.862222
8,RAISIN,200,log_loss,10,10,8,log2,0.856667


In [21]:
a=[results[0:3],results[3:6],results[6:9]]

wine = pd.DataFrame(a[0]).head()
wine   

Unnamed: 0,Dataset,n_estimators,criterion,max_depth,min_samples_split,min_samples_leaf,max_features,Mean_CV_Accuracy
0,WINE,50,gini,1,2,1,10,0.544751
1,WINE,100,entropy,5,5,4,sqrt,0.612929
2,WINE,200,log_loss,10,10,8,log2,0.639197


In [22]:
drybean=pd.DataFrame(a[1]).head()
drybean

Unnamed: 0,Dataset,n_estimators,criterion,max_depth,min_samples_split,min_samples_leaf,max_features,Mean_CV_Accuracy
0,DRYBEAN,50,gini,1,2,1,10,0.588
1,DRYBEAN,100,entropy,5,5,4,sqrt,0.888
2,DRYBEAN,200,log_loss,10,10,8,log2,0.888


In [23]:
raisin=pd.DataFrame(a[2]).head()
raisin

Unnamed: 0,Dataset,n_estimators,criterion,max_depth,min_samples_split,min_samples_leaf,max_features,Mean_CV_Accuracy
0,RAISIN,50,gini,1,2,1,10,0.857778
1,RAISIN,100,entropy,5,5,4,sqrt,0.862222
2,RAISIN,200,log_loss,10,10,8,log2,0.856667


In [24]:
wine[wine.Mean_CV_Accuracy==max(wine.Mean_CV_Accuracy)]

Unnamed: 0,Dataset,n_estimators,criterion,max_depth,min_samples_split,min_samples_leaf,max_features,Mean_CV_Accuracy
2,WINE,200,log_loss,10,10,8,log2,0.639197


In [25]:
drybean[drybean.Mean_CV_Accuracy==max(drybean.Mean_CV_Accuracy)]

Unnamed: 0,Dataset,n_estimators,criterion,max_depth,min_samples_split,min_samples_leaf,max_features,Mean_CV_Accuracy
1,DRYBEAN,100,entropy,5,5,4,sqrt,0.888


In [26]:
raisin[raisin.Mean_CV_Accuracy==max(raisin.Mean_CV_Accuracy)]

Unnamed: 0,Dataset,n_estimators,criterion,max_depth,min_samples_split,min_samples_leaf,max_features,Mean_CV_Accuracy
1,RAISIN,100,entropy,5,5,4,sqrt,0.862222
