# 📘 Assignment: Analyzing Data with Pandas and Visualizing Results with Matplotlib
---
This notebook covers:
- Loading and exploring a dataset with **pandas**
- Performing basic analysis
- Creating visualizations with **matplotlib** and **seaborn**
- Drawing observations


In [None]:
# -------------------------------
# Import required libraries
# -------------------------------
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

## Task 1: Load and Explore Dataset

In [None]:
try:
    # Load example dataset (Iris)
    df = sns.load_dataset("iris")
    print("✅ Dataset loaded successfully!\n")
except FileNotFoundError:
    print("❌ Error: File not found. Please check the path.")
except Exception as e:
    print(f"❌ Error: {e}")

In [None]:
# Display first few rows
df.head()

In [None]:
# Dataset info
print("Dataset Info:")
print(df.info())

# Check missing values
print("\nMissing Values:")
print(df.isnull().sum())

In [None]:
# Clean dataset (drop NAs if any)
df = df.dropna()
print("✅ Cleaned dataset shape:", df.shape)

## Task 2: Basic Data Analysis

In [None]:
# Summary statistics
df.describe()

In [None]:
# Grouping by species
grouped = df.groupby("species")["petal_length"].mean()
print("Average Petal Length per Species:")
print(grouped)

## Task 3: Data Visualization

In [None]:
# 1. Line Chart
plt.figure(figsize=(8,5))
plt.plot(df.index, df["sepal_length"], label="Sepal Length")
plt.plot(df.index, df["petal_length"], label="Petal Length")
plt.title("Line Chart: Sepal vs Petal Length")
plt.xlabel("Index")
plt.ylabel("Length (cm)")
plt.legend()
plt.show()

In [None]:
# 2. Bar Chart
plt.figure(figsize=(7,5))
grouped.plot(kind="bar", color=["#4c72b0", "#55a868", "#c44e52"])
plt.title("Average Petal Length per Species")
plt.xlabel("Species")
plt.ylabel("Petal Length (cm)")
plt.show()

In [None]:
# 3. Histogram
plt.figure(figsize=(7,5))
plt.hist(df["sepal_length"], bins=15, color="skyblue", edgecolor="black")
plt.title("Histogram: Distribution of Sepal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Frequency")
plt.show()

In [None]:
# 4. Scatter Plot
plt.figure(figsize=(7,5))
sns.scatterplot(x="sepal_length", y="petal_length", hue="species", data=df, palette="deep")
plt.title("Scatter Plot: Sepal Length vs Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.legend(title="Species")
plt.show()

## Findings & Observations

In [None]:
print("\n📊 Findings & Observations:")
print("- No missing values found in the Iris dataset.")
print("- Setosa has the smallest petal sizes on average.")
print("- Virginica has the largest petal sizes.")
print("- Sepal length distribution is mostly between 5.0–6.5 cm.")
print("- Scatter plot shows clear clusters of species, indicating strong relationships between sepal and petal length.")