# Pandas

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
iris = pd.read_csv('data/iris.csv')

In [None]:
iris.head()

## Basic Data Inspection

In [None]:
iris['species'].value_counts()

In [None]:
# Bar plot of species counts
iris['species'].value_counts().plot.bar(figsize=(8, 6), color='coral', edgecolor='black')
plt.title('Count of Each Species')
plt.xlabel('Species')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Altrnative syntax using Axes object
# ax = iris['species'].value_counts().plot.bar(figsize=(8, 6), color='coral', edgecolor='black')
# ax.set_title('Count of Each Species')
# ax.set_xlabel('Species')
# ax.set_ylabel('Count')
# plt.xticks(rotation=45)
# plt.tight_layout()
# plt.show()

In [None]:
# Horizontal bar plot
iris['species'].value_counts().plot.barh(figsize=(8, 6), color='lightgreen', edgecolor='black')
plt.title('Count of Each Species (Horizontal)')
plt.xlabel('Count')
plt.ylabel('Species')
plt.tight_layout()
plt.show()

In [None]:
# Plot histogram for a single column
iris['sepal_length'].hist(bins=20, edgecolor='black', color='skyblue')
plt.title('Distribution of Sepal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Frequency')
plt.show()


In [None]:
# Box plot for all numeric features
iris.boxplot(figsize=(10, 6), grid=False)
plt.title('Box Plot of Iris Features')
plt.ylabel('Value (cm)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Box plot by species
iris.boxplot(by='species', figsize=(12, 8), layout=(2, 2))
plt.suptitle('Box Plots by Species', y=1.02, fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# Scatter plot with color by species
colors = {'setosa': 'red', 'versicolor': 'blue', 'virginica': 'green'}
iris.plot.scatter(x='petal_length', y='petal_width', c=iris['species'].map(colors),
                  figsize=(8, 6), title='Petal Length vs Petal Width by Species', 
                  alpha=0.6, s=50)
plt.xlabel('Petal Length (cm)')
plt.ylabel('Petal Width (cm)')
plt.show()

In [None]:
# Scatter matrix to see all pairwise relationships
from pandas.plotting import scatter_matrix

scatter_matrix(iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']], 
               figsize=(12, 12), diagonal='hist', alpha=0.5)
plt.suptitle('Scatter Matrix of Iris Features', y=0.995, fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# Density plot for all numeric columns
iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].plot.kde(
    figsize=(10, 6), alpha=0.7)
plt.title('Density Plot of Iris Features')
plt.xlabel('Value (cm)')
plt.ylabel('Density')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Area plot
iris_sorted = iris.sort_values('sepal_length').reset_index(drop=True)
iris_sorted[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].plot.area(
    figsize=(12, 6), alpha=0.4)
plt.title('Area Plot of Iris Features (Sorted by Sepal Length)')
plt.xlabel('Index')
plt.ylabel('Cumulative Value (cm)')
plt.legend(loc='best')
plt.tight_layout()
plt.show()