# 📊 Data Analysis & Visualization Assignment

## Task 1: Load and Explore the Dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load the Iris dataset
iris = load_iris(as_frame=True)
df = iris.frame
df['species'] = df['target'].map(dict(enumerate(iris.target_names)))

# Display first rows
print("First 5 rows of dataset:")
display(df.head())

# Check structure
print("\nDataset Info:")
print(df.info())
print("\nMissing values:")
print(df.isnull().sum())

## Task 2: Basic Statistics and Grouping

In [None]:
# Basic Statistics
print("\nBasic Statistics:")
display(df.describe())

# Median values
print("\nMedian values:")
print(df.median())

# Grouping by species
grouped = df.groupby('species').mean()
print("\nMean values grouped by species:")
display(grouped)

## Task 3: Data Visualization

In [None]:
# 1. Line chart - Cumulative mean of sepal length
df['cum_mean_sepal_length'] = df['sepal length (cm)'].expanding().mean()
plt.figure(figsize=(8,5))
plt.plot(df.index, df['cum_mean_sepal_length'], label="Cumulative Mean Sepal Length")
plt.title("Trend of Sepal Length Over Observations")
plt.xlabel("Observation Index")
plt.ylabel("Sepal Length (cm)")
plt.legend()
plt.show()

In [None]:
# 2. Bar chart - Average petal length per species
plt.figure(figsize=(8,5))
sns.barplot(x="species", y="petal length (cm)", data=df, ci=None)
plt.title("Average Petal Length per Species")
plt.xlabel("Species")
plt.ylabel("Petal Length (cm)")
plt.show()

In [None]:
# 3. Histogram - Distribution of sepal width
plt.figure(figsize=(8,5))
plt.hist(df["sepal width (cm)"], bins=20, color="skyblue", edgecolor="black")
plt.title("Distribution of Sepal Width")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()

In [None]:
# 4. Scatter plot - Sepal length vs Sepal width
plt.figure(figsize=(8,5))
sns.scatterplot(x="sepal length (cm)", y="sepal width (cm)", hue="species", data=df)
plt.title("Sepal Length vs Sepal Width")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.legend(title="Species")
plt.show()

## Findings & Observations
- Setosa species tend to have smaller petal lengths compared to Versicolor and Virginica.
- The histogram shows Sepal Width is roughly normally distributed but slightly skewed.
- The scatter plot indicates Setosa is clearly separated from the other two species in sepal measurements.
- Line chart shows stabilization of average sepal length as more observations are included.