In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

#import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Problem Statement : 

* The Iris flower dataset consists of three species: setosa, versicolor,
and virginica. These species can be distinguished based on their
measurements. Now, imagine that you have the measurements
of Iris flowers categorized by their respective species. Your
objective is to train a machine learning model that can learn from
these measurements and accurately classify the Iris flowers into
their respective species.

* Use the Iris dataset to develop a model that can classify iris
flowers into different species based on their sepal and petal
measurements. This dataset is widely used for introductory
classification tasks.

# Importing important Libraries

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt

# Loading the *iris-flower-dataset* (IRIS.csv)

In [None]:
data = pd.read_csv('/kaggle/input/iris-flower-dataset/IRIS.csv')
data

# Separate features (X) and target labels (y)

In [None]:
X = data.drop('species', axis=1)
y = data['species']
X,y

# Split the data into training and testing sets using *train_test_split*.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features 
**Standardize the features using StandardScaler to scale them to a standard normal distribution.**


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#  Visualize feature importances
**Visualizing the feature importances using a bar plot.**

In [None]:
feature_importances = clf.feature_importances_
plt.barh(X.columns, feature_importances)
plt.xlabel('Feature Importance')
plt.ylabel('Features')
plt.title('Feature Importance Plot')
plt.show()

# Choosing a Model
**Initialize a RandomForestClassifier and train it on the scaled training data.**

In [None]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train_scaled, y_train)

# Predict on the test set
**Training the chosen model using the training data. Adjusting hyperparameters and considering techniques like cross-validation for optimal performance.**

In [None]:
y_pred = clf.predict(X_test_scaled)

# Evaluate the Model
**Evaluate the Model's performance on the testing set. Common metrics for classification include accuracy, precision, recall, F1-score, and confusion matrix.**

In [None]:
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_rep)

# Discussion of Results

The classification model built for the Iris flower dataset exhibits promising results. With an **accuracy score of 1.0** indicating correct classification of species, the model seems to effectively classify between iris types. 
The classification report offers insights into precision, recall, and F1-score for each species, facilitating a more nuanced evaluation. Balanced metrics across classes suggest a well-rounded performance. The feature importance plot unveils the varying impact of attributes on predictions. Sepal and petal dimensions are particularly influential, aligning with botanical understanding.


# Conclusion

In this project, a Random Forest classification model was employed to categorize Iris flower species. The model demonstrated high accuracy, effectively distinguishing species based on sepal and petal attributes. The feature importance analysis underscored the significance of these attributes in classification. The project's success highlights the utility of machine learning in botanical research and practical applications. By achieving accurate species identification, the model can contribute to fields like horticulture and species conservation. This project exemplifies the potential of AI-driven classification, fostering advancements in diverse domains through accurate and efficient data-driven decision-making.