### Importing libraries:

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

### Loading the data:

In [2]:
file_path = r'/kaggle/input/iris-flower-dataset/IRIS.csv'
data = pd.read_csv(file_path)

In [3]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
data.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [5]:
x = data.drop('species', axis=1)
y = data['species']

### Splitting the data into training and test sets:

In [6]:
X_train,X_test,Y_train,Y_test= train_test_split(x,y,test_size=0.2,random_state=42)

### Defining the pipeline and grid search for RandomForestClassifier:

In [7]:
pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', RandomForestClassifier(random_state=42))
])

In [8]:
param_grid_rf = {
    'clf__n_estimators': [100, 200, 300],  # Number of trees in the forest
    'clf__max_depth': [None, 10, 20, 30],   # Maximum depth of the tree
    'clf__min_samples_split': [2, 5, 10],   # Minimum number of samples required to split a node
    'clf__min_samples_leaf': [1, 2, 4]      # Minimum number of samples required at each leaf node
}

### Searching for the best parameters with GridSearchCV:

In [9]:
grid_search = GridSearchCV(pipeline_rf , param_grid_rf, cv=5, scoring='accuracy')
grid_search.fit(X_train,Y_train)


### Evaluating the model and displaying results:

In [10]:
best_model = grid_search.best_estimator_

In [11]:
y_pred = best_model.predict(X_test)

In [12]:
accuracy = accuracy_score(Y_test, y_pred)
report = classification_report(Y_test, y_pred)

In [13]:

print(f'Best Parameters: {grid_search.best_params_}')
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)


Best Parameters: {'clf__max_depth': None, 'clf__min_samples_leaf': 1, 'clf__min_samples_split': 2, 'clf__n_estimators': 200}
Accuracy: 1.0
Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



### Cross-validation score:

In [14]:
cv_scores = cross_val_score(best_model, x, y, cv=5, scoring='accuracy')
print(f'Cross-validation Accuracy: {cv_scores.mean()}')

Cross-validation Accuracy: 0.9666666666666668
