In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from catboost import CatBoostClassifier
import warnings
warnings.filterwarnings('ignore')

In [None]:
iris_df = pd.read_csv('/kaggle/input/iris-flower-dataset/IRIS.csv')

In [None]:
iris_df.info()

In [None]:
iris_df.describe()

In [None]:
iris_df.head()

In [None]:
iris_df['species'].value_counts()

In [None]:
plt.figure(figsize=(10, 6))
sns.pairplot(iris_df, hue='species')
plt.suptitle('Pairplot of Iris Dataset', y=1.02)
plt.show()

In [None]:
plt.figure(figsize=(10, 8))
corr = iris_df.drop('species', axis=1).corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Heatmap')
plt.show()

In [None]:
X = iris_df.drop('species', axis=1)
y = iris_df['species']

In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [None]:
clf_model = RandomForestClassifier(n_estimators=100, random_state=42)
clf_model.fit(X_train, y_train)

In [None]:
clf_y_pred = clf_model.predict(X_test)

In [None]:
print("Random Forest Performance:")
print(f"Accuracy: {accuracy_score(y_test, clf_y_pred):.2f}")
print(classification_report(y_test, clf_y_pred, target_names=label_encoder.classes_))

In [None]:
catboost_model = CatBoostClassifier(learning_rate=0.1, depth=6, iterations=100, verbose=0)

In [None]:
catboost_model.fit(X_train, y_train)

In [None]:
catboost_y_pred = catboost_model.predict(X_test)

In [None]:
print("CatBoost Performance:")
print(f"Accuracy: {accuracy_score(y_test, catboost_y_pred):.2f}")
print(classification_report(y_test, catboost_y_pred, target_names=label_encoder.classes_))