<a href="https://colab.research.google.com/github/aabelatilaw/-CodeAlpha_ProjectName-/blob/main/iris_flower_classification_with_machine_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Iris Flower Classification with Machine Learning**

Step 1: Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import load_iris
import seaborn as sns
import matplotlib.pyplot as plt

Step 2: Load and Explore the Data

In [None]:
# Load the data
df = pd.read_csv('Iris.csv')

# Explore the data
print(df.head())
print("\nData Info:")
print(df.info())
print("\nSummary Statistics:")
print(df.describe())
print("\nSpecies Count:")
print(df['Species'].value_counts())

Step 3: Data Preprocessing

In [None]:
# Drop the Id column as it's not needed
df = df.drop('Id', axis=1)

# Encode the species labels
label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])

# Visualize the data
sns.pairplot(df, hue='Species')
plt.show()

Step 4: Prepare Training and Test Data

In [None]:
# Split into features and target
X = df.drop('Species', axis=1)
y = df['Species']

# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

Step 5: Train the Model

In [None]:
# Initialize the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

Step 6: Evaluate the Model

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8,6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

Step 7: Feature Importance Analysis

In [None]:
# Get feature importances
importances = model.feature_importances_
features = X.columns

# Create a DataFrame for visualization
feature_importance = pd.DataFrame({'Feature': features, 'Importance': importances})
feature_importance = feature_importance.sort_values('Importance', ascending=False)

# Plot feature importance
plt.figure(figsize=(10,6))
sns.barplot(x='Importance', y='Feature', data=feature_importance)
plt.title('Feature Importance')
plt.show()

Step 8: Make Predictions on New Data

In [None]:
# Example new data (sepal length, sepal width, petal length, petal width)
new_data = [[5.1, 3.5, 1.4, 0.2],  # Likely setosa
            [6.0, 2.7, 5.1, 1.6],  # Likely versicolor
            [6.3, 3.3, 6.0, 2.5]]  # Likely virginica

# Make predictions
predictions = model.predict(new_data)

# Convert numeric predictions back to species names
predicted_species = label_encoder.inverse_transform(predictions)

print("\nPredictions for new data:")
for i, pred in enumerate(predicted_species):
    print(f"Sample {i+1}: Predicted Species = {pred}")