# Analyzing Data with Pandas and Visualizing Results with Matplotlib
This notebook uses the Iris dataset for basic analysis and visualization.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

## Task 1: Load and Explore the Dataset

In [None]:
# Load the dataset
try:
    iris = load_iris()
    df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    df['species'] = iris.target
    df['species'] = df['species'].map(dict(zip(range(3), iris.target_names)))
    print("Dataset loaded successfully.")
except Exception as e:
    print("Error loading dataset:", e)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

## Task 2: Basic Data Analysis

In [None]:
df.describe()

In [None]:
grouped = df.groupby('species').mean()
grouped

### Observation:
- Setosa tends to have smaller petal lengths and widths.
- Virginica has the largest petal dimensions.

## Task 3: Data Visualization

In [None]:
# Line chart - Simulated trend
plt.figure(figsize=(8, 4))
df.groupby(df.index)['sepal length (cm)'].mean().plot(title='Sepal Length Trend')
plt.xlabel("Index")
plt.ylabel("Sepal Length (cm)")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Bar chart - Average petal length by species
plt.figure(figsize=(6, 4))
sns.barplot(x='species', y='petal length (cm)', data=df)
plt.title('Average Petal Length per Species')
plt.ylabel('Petal Length (cm)')
plt.xlabel('Species')
plt.tight_layout()
plt.show()

In [None]:
# Histogram - Distribution of sepal length
plt.figure(figsize=(6, 4))
plt.hist(df['sepal length (cm)'], bins=15, color='skyblue', edgecolor='black')
plt.title('Distribution of Sepal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
# Scatter plot - Sepal vs Petal Length
plt.figure(figsize=(6, 4))
sns.scatterplot(data=df, x='sepal length (cm)', y='petal length (cm)', hue='species')
plt.title('Sepal vs Petal Length')
plt.tight_layout()
plt.show()