# 📁 Task 1: Load & Clean the Dataset

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load from seaborn's built-in dataset
df = sns.load_dataset("iris")

In [None]:
df.head()

In [None]:
print("Data Types:\n", df.dtypes)
print("\nMissing Values:\n", df.isnull().sum())

In [None]:
# No missing values here, but demonstrate filling if there were:
df = df.fillna(df.mean(numeric_only=True))

# 📊 Task 2: Basic Data Analysis

In [None]:
df.describe()

In [None]:
# Average of measurements per species
df.groupby("species").mean(numeric_only=True)

In [None]:
# Optional: see variation in petal length between species
df.groupby("species")["petal_length"].mean().plot(kind='bar', title="Average Petal Length by Species")
plt.ylabel("Petal Length (cm)")
plt.show()

# 📈 Task 3: Data Visualization

In [None]:
# Simulate trend by assigning index as date (for visualization)
df_trend = df.copy()
df_trend["date"] = pd.date_range(start="2024-01-01", periods=len(df_trend), freq='D')
df_trend.groupby("date")["sepal_length"].mean().plot(title="Average Sepal Length Over Time")
plt.xlabel("Date")
plt.ylabel("Sepal Length")
plt.show()

In [None]:
df.groupby("species")["petal_length"].mean().plot(kind='bar', color='skyblue', title="Petal Length per Species")
plt.ylabel("Average Petal Length")
plt.show()

In [None]:
df["sepal_width"].plot(kind='hist', bins=15, title="Distribution of Sepal Width", color='orange')
plt.xlabel("Sepal Width (cm)")
plt.show()

In [None]:
sns.scatterplot(data=df, x="sepal_length", y="petal_length", hue="species")
plt.title("Sepal Length vs Petal Length")
plt.show()