In [1]:
# Import numpy for numerical operations and array handling
import numpy as np
# Import pandas for data manipulation and analysis
import pandas as pd
# Import array specifically from numpy for direct array creation
from numpy import array

In [2]:
# Read the Iris dataset from CSV file into a pandas DataFrame
# The dataset contains measurements of iris flowers including sepal and petal dimensions
df = pd.read_csv("Iris (1).csv")

In [None]:
# Display the first 5 rows of the DataFrame to get an overview of the data structure
# This helps in understanding the column names, data types, and sample values
df.head()

In [None]:
# Calculate the number of columns in the DataFrame by converting it to a list and getting its length
# This gives us the dimensionality of our dataset in terms of features/attributes
column = len(list(df))
# Print the number of columns in the DataFrame
# This helps understand the dimensionality of the dataset
column

In [None]:
# Display detailed information about the DataFrame including:
# - Number of rows and columns
# - Column names and their data types
# - Memory usage
# - Number of non-null values in each column
# This helps in understanding the data structure and identifying any missing values
df.info()

In [None]:
# Get unique values in the "Species" column to identify distinct iris flower types
# This helps understand the different classes/categories in our dataset
np.unique(df["Species"])
array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [None]:
# Generate descriptive statistics for the DataFrame including:
# - Count: Number of non-null values
# - Mean: Average value
# - Std: Standard deviation
# - Min: Minimum value
# - 25%: First quartile
# - 50%: Median (second quartile)
# - 75%: Third quartile
# - Max: Maximum value
# This helps understand the distribution and range of values in each numeric column
df.describe()

In [8]:
# Import seaborn for statistical data visualization
# Seaborn provides a high-level interface for drawing attractive and informative statistical graphics
import seaborn as sns

# Import matplotlib for basic plotting functionality
# Matplotlib is a comprehensive library for creating static, animated, and interactive visualizations
import matplotlib

# Import pyplot from matplotlib for MATLAB-like plotting interface
# Pyplot provides a convenient interface to the matplotlib object-oriented plotting library
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
# Create a figure with a 2x2 grid of subplots
# - figsize=(16, 8) sets the figure size to 16 inches wide and 8 inches tall
# - This creates a 2x2 grid of axes objects that we can use to plot different histograms
fig, axes = plt.subplots(2, 2, figsize=(16, 8))
axes[0,0].set_title("Distribution of First Column")
axes[0,0].hist(df["SepalLengthCm"]);

axes[0,1].set_title("Distribution of Second Column")
axes[0,1].hist(df["SepalWidthCm"]);

axes[1,0].set_title("Distribution of Third Column")
axes[1,0].hist(df["PetalLengthCm"]);

axes[1,1].set_title("Distribution of Fourth Column")
axes[1,1].hist(df["PetalWidthCm"]);


In [10]:
# Create a list of pandas Series containing the four numeric columns from the Iris dataset
# This list will be used to create boxplots showing the distribution of each feature
# The order is: Sepal Length, Sepal Width, Petal Length, Petal Width
data_to_plot = [df["SepalLengthCm"], df["SepalWidthCm"], df["PetalLengthCm"], df["PetalWidthCm"]]
sns.set_style("whitegrid")

In [11]:
# Creating a figure instance
# Create a new figure with ID 1 and dimensions 12x8 inches
# - figure(1) creates a new figure with ID 1, which can be referenced later
# - figsize=(12,8) sets the width to 12 inches and height to 8 inches
# - This provides enough space for the boxplot visualization while maintaining readability
fig = plt.figure(1, figsize=(12,8))

# Creating an axes instance
ax = fig.add_subplot(111)

# Creating the boxplot
bp = ax.boxplot(data_to_plot);
