# Iris Dataset Analysis
This notebook covers data loading, exploration, basic analysis, and visualizations using the Iris dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# QUESTION 1

# Task 1: Load and Explore the Dataset and  Load the Iris dataset
try:
    df = sns.load_dataset("iris")

# Display the first few rows of the dataset
    print("First 5 rows of the dataset:")
    print(df.head())

 # Explore the structure: Data types
    print("\nData types and missing values:")
    print(df.info())


    # Check for missing values
    print("\nMissing values:")
    print(df.isnull().sum())

#   cleaning the dataset
    df = df.dropna()
except FileNotFoundError:
    print("Error: The dataset file was not found.")
except pd.errors.EmptyDataError:
    print("Error: The dataset file is empty.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [None]:
# QUESTION TWO
# Basic statistics
print("\nBasic statistical description:")
print(df.describe())

# Group by species and compute mean of numeric columns
print("\nMean values grouped by species:")
grouped_means = df.groupby("species").mean()
print(grouped_means)

# Observating
print("\nObservation:")
print("Setosa tends to have smaller petal length and width compared to Versicolor and Virginica.")

In [None]:
# QUESTION 3
# data visualisation
sns.set(style="whitegrid")

# 1. Line Chart: Simulated trend (e.g., mean petal length over index)
plt.figure(figsize=(10, 5))
df_sorted = df.sort_values(by="petal_length")
plt.plot(df_sorted.index, df_sorted["petal_length"], label='Petal Length')
plt.title("Line Chart - Petal Length Trend")
plt.xlabel("Index")
plt.ylabel("Petal Length")
plt.legend()
plt.show()

In [None]:
# 2. Bar Chart: Average petal length per species
plt.figure(figsize=(8, 5))
sns.barplot(x="species", y="petal_length", data=df)
plt.title("Bar Chart - Average Petal Length per Species")
plt.xlabel("Species")
plt.ylabel("Average Petal Length")
plt.show()

In [None]:
# 3. Histogram: Distribution of Sepal Length
plt.figure(figsize=(8, 5))
plt.hist(df["sepal_length"], bins=15, color='skyblue', edgecolor='black')
plt.title("Histogram - Sepal Length Distribution")
plt.xlabel("Sepal Length")
plt.ylabel("Frequency")
plt.show()

In [None]:

# 4. Scatter Plot: Sepal Length vs Petal Length
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x="sepal_length", y="petal_length", hue="species")
plt.title("Scatter Plot - Sepal Length vs Petal Length")
plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.legend()
plt.show()