# Data Analysis of the Iris Dataset
This notebook contains data loading, exploration, basic analysis, and visualization of the Iris dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load the dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = iris.target
df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# Display first few rows
df.head()

In [None]:
# Check dataset information
df.info()

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Grouping by species and computing mean
df.groupby('species').mean()

## Data Visualization

In [None]:
sns.set_style('whitegrid')

# Line Chart (Example: Trend over Index)
plt.figure(figsize=(8,5))
plt.plot(df.index, df['sepal length (cm)'], label='Sepal Length')
plt.xlabel('Index')
plt.ylabel('Sepal Length (cm)')
plt.title('Trend of Sepal Length over Index')
plt.legend()
plt.show()

In [None]:
# Bar Chart (Mean petal length per species)
plt.figure(figsize=(8,5))
sns.barplot(x='species', y='petal length (cm)', data=df, estimator=sum, palette='viridis')
plt.title('Total Petal Length per Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.show()

In [None]:
# Histogram (Distribution of Sepal Width)
plt.figure(figsize=(8,5))
sns.histplot(df['sepal width (cm)'], bins=20, kde=True, color='blue')
plt.title('Distribution of Sepal Width')
plt.xlabel('Sepal Width (cm)')
plt.show()

In [None]:
# Scatter Plot (Sepal Length vs Petal Length)
plt.figure(figsize=(8,5))
sns.scatterplot(x='sepal length (cm)', y='petal length (cm)', hue='species', data=df, palette='deep')
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.show()