In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Task 1: Load and Explore the Dataset

print("=== Loading Dataset ===")

try:
    # Load Iris dataset from sklearn
    iris = load_iris(as_frame=True)
    df = iris.frame  # Pandas DataFrame
    df['species'] = iris.target_names[iris.target]  # Add categorical species column

    print("Dataset successfully loaded!\n")
except Exception as e:
    print(f"Error loading dataset: {e}")

# Display first few rows
print("First 5 rows of the dataset:")
print(df.head(), "\n")

# Check dataset info
print("Dataset Info:")
print(df.info(), "\n")

# Check for missing values
print("Missing Values:")
print(df.isnull().sum(), "\n")

# No missing values in Iris dataset, but if there were:
# df = df.dropna()  # or df.fillna(method="ffill", inplace=True)

# Task 2: Basic Data Analysis

print("=== Basic Data Analysis ===")

# Basic statistics
print("\nDescriptive Statistics:")
print(df.describe(), "\n")

# Group by species and calculate mean of numerical columns
grouped_means = df.groupby("species").mean()
print("Average values per species:")
print(grouped_means, "\n")

# Observation example
print("Observation: Setosa flowers tend to have smaller petal length and width compared to Versicolor and Virginica.\n")

# Task 3: Data Visualization

print("=== Data Visualization ===")

# Style for seaborn plots
sns.set(style="whitegrid")

# 1. Line chart (simulating a trend by using index as a time-like variable)
plt.figure(figsize=(8,5))
plt.plot(df.index, df["sepal length (cm)"], label="Sepal Length", color="blue")
plt.title("Line Chart: Sepal Length over Index")
plt.xlabel("Index")
plt.ylabel("Sepal Length (cm)")
plt.legend()
plt.show()

# 2. Bar chart (average petal length per species)
plt.figure(figsize=(8,5))
sns.barplot(x="species", y="petal length (cm)", data=df, ci=None, palette="Set2")
plt.title("Bar Chart: Average Petal Length by Species")
plt.xlabel("Species")
plt.ylabel("Average Petal Length (cm)")
plt.show()

# 3. Histogram (distribution of sepal width)
plt.figure(figsize=(8,5))
plt.hist(df["sepal width (cm)"], bins=20, color="purple", edgecolor="black")
plt.title("Histogram: Distribution of Sepal Width")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()

# 4. Scatter plot (sepal length vs petal length)
plt.figure(figsize=(8,5))
sns.scatterplot(x="sepal length (cm)", y="petal length (cm)", hue="species", data=df, palette="Set1")
plt.title("Scatter Plot: Sepal Length vs Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend(title="Species")
plt.show()

print("\nAll tasks completed successfully ✅")
