# Iris Dataset Exploratory Data Analysis (EDA)

### Author: Christopher Braun

Purpose: Exploratory Data Analysis of the Iris dataset using pandas and visualization tools

Date: September 2025

### **1. Imports**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Set style for better visualizations
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)

### **2. Load Data**

In [None]:
# Load the Iris dataset into pandas DataFrame
iris_df: pd.DataFrame = sns.load_dataset('iris')

# List column names
print("Column names:")
print(iris_df.columns.tolist())
print("\n")

# Inspect first few rows of the DataFrame
print("First 5 rows:")
iris_df.head()

### **3. Initial Data Inspection**

In [None]:
# Specify the number of rows to display
print("First 10 rows:")
iris_df.head(10)

# Inspect the shape of the DataFrame with shape attribute
print(f"DataFrame shape: {iris_df.shape}")
print(f"Number of rows: {iris_df.shape[0]}")
print(f"Number of columns: {iris_df.shape[1]}")

# Inspect the data types of the columns with dtypes attribute
print("Data types:")
print(iris_df.dtypes)

# Inspect the data types of the columns with info() method
iris_df.info()

### **4. Initial Descriptive Statistics**

In [None]:
# Inspect summary statistics for numerical columns
iris_df.describe()

### **5. Initial Data Distribution for Numerical Columns**

In [None]:
# Inspect histogram by one numerical column
iris_df['sepal_length'].hist()
plt.title('Distribution of Sepal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Frequency')
plt.show()

# Inspect histograms for ALL numerical columns
iris_df.hist()
plt.suptitle('Distribution of All Numerical Features', y=1.02)
plt.tight_layout()
plt.show()

### **6. Initial Data Distribution for Catagorical Columns**

In [None]:
# Inspect value counts by categorical column
print("Species value counts:")
print(iris_df['species'].value_counts())
print("\n")

# Visualize distribution of species
sns.countplot(x='species', data=iris_df)
plt.title('Distribution of Iris Species')
plt.xlabel('Species')
plt.ylabel('Count')
plt.show()