# Exploratory Data Analysis on the Iris Dataset

In this notebook, we will perform exploratory data analysis (EDA) on the Iris dataset. We will load the dataset, visualize the features, and analyze the relationships between different species of Iris flowers.

In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style for seaborn
sns.set(style='whitegrid')

In [2]:
# Load the Iris dataset
df = pd.read_csv('../data/iris.csv')

# Display the first few rows of the dataset
df.head()

In [3]:
# Summary statistics of the dataset
df.describe()

In [4]:
# Visualize the distribution of each feature
plt.figure(figsize=(12, 8))
for i, column in enumerate(df.columns[:-1]):
    plt.subplot(2, 2, i + 1)
    sns.histplot(df[column], kde=True, bins=15)
    plt.title(f'Distribution of {column}')
plt.tight_layout()
plt.show()

In [5]:
# Pairplot to visualize relationships between features
sns.pairplot(df, hue='species')
plt.show()

In [6]:
# Boxplot to visualize the distribution of features by species
plt.figure(figsize=(12, 6))
sns.boxplot(data=df, x='species', y='sepal_length')
plt.title('Sepal Length by Species')
plt.show()

In [7]:
# Correlation heatmap
plt.figure(figsize=(8, 6))
correlation = df.corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()