# Matplotlib Plotting Basics

**Course:** MLM-101 - Machine Learning Mastery  
**Phase 5:** Matplotlib for Visualization (Lectures 38-40)  
**Topics:** Line Plots, Scatter Plots, Bar Charts, Subplots

---

## üìö Learning Objectives

By the end of this notebook, you will be able to:

‚úÖ Create basic plots (line, scatter, bar, histogram)  
‚úÖ Customize plot appearance  
‚úÖ Create multiple subplots  
‚úÖ Visualize ML training metrics  
‚úÖ Save plots to files

---

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

print(f"Matplotlib version: {plt.matplotlib.__version__}")

# Set style for better-looking plots
%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')

## 1Ô∏è‚É£ Line Plots

Perfect for visualizing trends and time series.

In [None]:
# Simple line plot
x = np.linspace(0, 10, 100)
y = np.sin(x)

plt.figure(figsize=(10, 6))
plt.plot(x, y)
plt.title('Sine Wave')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.grid(True)
plt.show()

In [None]:
# Multiple lines
x = np.linspace(0, 10, 100)
y1 = np.sin(x)
y2 = np.cos(x)

plt.figure(figsize=(10, 6))
plt.plot(x, y1, label='sin(x)', color='blue', linewidth=2)
plt.plot(x, y2, label='cos(x)', color='red', linewidth=2, linestyle='--')
plt.title('Sine and Cosine Waves')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

### üéØ ML Example: Training Loss Curve

In [None]:
# Simulate training history
epochs = np.arange(1, 51)
train_loss = 2.0 * np.exp(-epochs/10) + 0.1 + np.random.normal(0, 0.05, 50)
val_loss = 2.0 * np.exp(-epochs/10) + 0.2 + np.random.normal(0, 0.08, 50)

plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss, label='Training Loss', color='blue', linewidth=2)
plt.plot(epochs, val_loss, label='Validation Loss', color='orange', linewidth=2)
plt.title('Model Training History', fontsize=16, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Final Training Loss: {train_loss[-1]:.4f}")
print(f"Final Validation Loss: {val_loss[-1]:.4f}")

---

## 2Ô∏è‚É£ Scatter Plots

Show relationships between two variables.

In [None]:
# Simple scatter plot
np.random.seed(42)
x = np.random.rand(50) * 10
y = 2 * x + 1 + np.random.randn(50) * 2

plt.figure(figsize=(10, 6))
plt.scatter(x, y, alpha=0.6, s=100)
plt.title('Scatter Plot Example')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Scatter with colors and sizes
np.random.seed(42)
x = np.random.rand(100) * 10
y = np.random.rand(100) * 10
colors = np.random.rand(100)
sizes = np.random.rand(100) * 500

plt.figure(figsize=(10, 6))
scatter = plt.scatter(x, y, c=colors, s=sizes, alpha=0.5, cmap='viridis')
plt.colorbar(scatter, label='Color Value')
plt.title('Scatter Plot with Colors and Sizes')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True, alpha=0.3)
plt.show()

### üéØ ML Example: Classification Decision Boundary

In [None]:
# Generate sample classification data
np.random.seed(42)
class_0 = np.random.randn(50, 2) + np.array([2, 2])
class_1 = np.random.randn(50, 2) + np.array([6, 6])

plt.figure(figsize=(10, 6))
plt.scatter(class_0[:, 0], class_0[:, 1], label='Class 0', 
           alpha=0.6, s=100, color='blue', edgecolors='black')
plt.scatter(class_1[:, 0], class_1[:, 1], label='Class 1', 
           alpha=0.6, s=100, color='red', edgecolors='black')

# Decision boundary (simplified)
x_line = np.linspace(0, 10, 100)
y_line = x_line  # Simple diagonal boundary
plt.plot(x_line, y_line, 'k--', linewidth=2, label='Decision Boundary')

plt.title('Binary Classification', fontsize=16, fontweight='bold')
plt.xlabel('Feature 1', fontsize=12)
plt.ylabel('Feature 2', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

---

## 3Ô∏è‚É£ Bar Charts

In [None]:
# Simple bar chart
categories = ['A', 'B', 'C', 'D', 'E']
values = [23, 45, 56, 78, 32]

plt.figure(figsize=(10, 6))
plt.bar(categories, values, color='steelblue', alpha=0.7, edgecolor='black')
plt.title('Bar Chart Example')
plt.xlabel('Category')
plt.ylabel('Value')
plt.grid(True, alpha=0.3, axis='y')
plt.show()

In [None]:
# Grouped bar chart
categories = ['A', 'B', 'C', 'D']
values1 = [20, 35, 30, 35]
values2 = [25, 32, 34, 20]

x = np.arange(len(categories))
width = 0.35

plt.figure(figsize=(10, 6))
plt.bar(x - width/2, values1, width, label='Group 1', color='blue', alpha=0.7)
plt.bar(x + width/2, values2, width, label='Group 2', color='orange', alpha=0.7)

plt.title('Grouped Bar Chart')
plt.xlabel('Category')
plt.ylabel('Value')
plt.xticks(x, categories)
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

### üéØ ML Example: Model Comparison

In [None]:
# Compare model accuracies
models = ['Logistic\nRegression', 'Decision\nTree', 'Random\nForest', 'SVM', 'Neural\nNetwork']
accuracy = [0.82, 0.78, 0.89, 0.85, 0.91]

colors = ['red' if acc < 0.80 else 'orange' if acc < 0.85 else 'green' for acc in accuracy]

plt.figure(figsize=(12, 6))
bars = plt.bar(models, accuracy, color=colors, alpha=0.7, edgecolor='black')

# Add value labels on bars
for bar, acc in zip(bars, accuracy):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
            f'{acc:.2f}',
            ha='center', va='bottom', fontsize=11, fontweight='bold')

plt.title('Model Accuracy Comparison', fontsize=16, fontweight='bold')
plt.xlabel('Model', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.ylim(0, 1.0)
plt.axhline(y=0.85, color='blue', linestyle='--', linewidth=2, label='Target (0.85)')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

---

## 4Ô∏è‚É£ Histograms

Show distribution of data.

In [None]:
# Simple histogram
np.random.seed(42)
data = np.random.normal(100, 15, 1000)

plt.figure(figsize=(10, 6))
plt.hist(data, bins=30, color='steelblue', alpha=0.7, edgecolor='black')
plt.title('Histogram Example')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3, axis='y')
plt.show()

In [None]:
# Multiple histograms
np.random.seed(42)
data1 = np.random.normal(100, 15, 1000)
data2 = np.random.normal(110, 20, 1000)

plt.figure(figsize=(10, 6))
plt.hist(data1, bins=30, alpha=0.5, label='Distribution 1', color='blue')
plt.hist(data2, bins=30, alpha=0.5, label='Distribution 2', color='red')
plt.title('Overlapping Histograms')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.show()

---

## 5Ô∏è‚É£ Subplots

Create multiple plots in one figure.

In [None]:
# Create 2x2 subplot grid
np.random.seed(42)
x = np.linspace(0, 10, 100)

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Plot 1: Line plot
axes[0, 0].plot(x, np.sin(x), 'b-', linewidth=2)
axes[0, 0].set_title('Sine Wave')
axes[0, 0].set_xlabel('x')
axes[0, 0].set_ylabel('sin(x)')
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Scatter plot
axes[0, 1].scatter(np.random.rand(50), np.random.rand(50), alpha=0.6)
axes[0, 1].set_title('Scatter Plot')
axes[0, 1].set_xlabel('X')
axes[0, 1].set_ylabel('Y')
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Bar chart
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 32]
axes[1, 0].bar(categories, values, color='green', alpha=0.7)
axes[1, 0].set_title('Bar Chart')
axes[1, 0].set_xlabel('Category')
axes[1, 0].set_ylabel('Value')
axes[1, 0].grid(True, alpha=0.3, axis='y')

# Plot 4: Histogram
data = np.random.normal(0, 1, 1000)
axes[1, 1].hist(data, bins=30, color='orange', alpha=0.7, edgecolor='black')
axes[1, 1].set_title('Histogram')
axes[1, 1].set_xlabel('Value')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

### üéØ ML Example: Training Metrics Dashboard

In [None]:
# Simulate training metrics
np.random.seed(42)
epochs = np.arange(1, 51)
train_loss = 2.0 * np.exp(-epochs/10) + 0.1 + np.random.normal(0, 0.05, 50)
val_loss = 2.0 * np.exp(-epochs/10) + 0.2 + np.random.normal(0, 0.08, 50)
train_acc = 1 - train_loss/2
val_acc = 1 - val_loss/2
learning_rate = 0.001 * (0.95 ** (epochs // 5))

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Loss curves
axes[0, 0].plot(epochs, train_loss, label='Train', linewidth=2)
axes[0, 0].plot(epochs, val_loss, label='Validation', linewidth=2)
axes[0, 0].set_title('Loss Curves', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Accuracy curves
axes[0, 1].plot(epochs, train_acc, label='Train', linewidth=2)
axes[0, 1].plot(epochs, val_acc, label='Validation', linewidth=2)
axes[0, 1].set_title('Accuracy Curves', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Learning rate schedule
axes[1, 0].plot(epochs, learning_rate, linewidth=2, color='red')
axes[1, 0].set_title('Learning Rate Schedule', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Learning Rate')
axes[1, 0].set_yscale('log')
axes[1, 0].grid(True, alpha=0.3)

# Final metrics
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
scores = [0.92, 0.89, 0.91, 0.90]
colors_metric = ['green' if s >= 0.90 else 'orange' for s in scores]
bars = axes[1, 1].bar(metrics, scores, color=colors_metric, alpha=0.7, edgecolor='black')
for bar, score in zip(bars, scores):
    height = bar.get_height()
    axes[1, 1].text(bar.get_x() + bar.get_width()/2., height,
                   f'{score:.2f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
axes[1, 1].set_title('Final Metrics', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Score')
axes[1, 1].set_ylim(0, 1.0)
axes[1, 1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

---

## 6Ô∏è‚É£ Saving Plots

In [None]:
# Create and save plot
plt.figure(figsize=(10, 6))
x = np.linspace(0, 10, 100)
plt.plot(x, np.sin(x), linewidth=2)
plt.title('Sine Wave')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.grid(True, alpha=0.3)

# Save in different formats
plt.savefig('sine_wave.png', dpi=300, bbox_inches='tight')
plt.savefig('sine_wave.pdf', bbox_inches='tight')
plt.show()

print("Plots saved as:")
print("  - sine_wave.png (high resolution)")
print("  - sine_wave.pdf (vector format)")

---

## üéØ Practice Exercise: Visualize Dataset

In [None]:
# Create sample dataset
np.random.seed(42)
df = pd.DataFrame({
    'age': np.random.randint(18, 65, 100),
    'salary': np.random.randint(30000, 120000, 100),
    'years_experience': np.random.randint(0, 30, 100),
    'department': np.random.choice(['Engineering', 'Sales', 'Marketing', 'HR'], 100)
})

print("Dataset info:")
print(df.describe())

# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Age distribution
axes[0, 0].hist(df['age'], bins=20, color='steelblue', alpha=0.7, edgecolor='black')
axes[0, 0].set_title('Age Distribution', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Age')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].grid(True, alpha=0.3, axis='y')

# 2. Salary vs Experience
axes[0, 1].scatter(df['years_experience'], df['salary'], alpha=0.6, s=50)
axes[0, 1].set_title('Salary vs Experience', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('Years of Experience')
axes[0, 1].set_ylabel('Salary ($)')
axes[0, 1].grid(True, alpha=0.3)

# 3. Average salary by department
dept_salary = df.groupby('department')['salary'].mean().sort_values(ascending=False)
axes[1, 0].bar(dept_salary.index, dept_salary.values, color='green', alpha=0.7, edgecolor='black')
axes[1, 0].set_title('Average Salary by Department', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Department')
axes[1, 0].set_ylabel('Average Salary ($)')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3, axis='y')

# 4. Department distribution
dept_counts = df['department'].value_counts()
axes[1, 1].pie(dept_counts.values, labels=dept_counts.index, autopct='%1.1f%%', startangle=90)
axes[1, 1].set_title('Employee Distribution by Department', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

---

## üéì Summary

In this notebook, you learned:

‚úÖ **Line Plots**: Visualize trends and training curves  
‚úÖ **Scatter Plots**: Show relationships and classification  
‚úÖ **Bar Charts**: Compare categories and models  
‚úÖ **Histograms**: Display data distributions  
‚úÖ **Subplots**: Create multi-plot dashboards  
‚úÖ **Saving**: Export plots in multiple formats  
‚úÖ **ML Applications**: Training metrics, model comparison

### üöÄ Next Steps

Continue to:
- **`matplotlib_customization.ipynb`** - Advanced styling and customization
- Create visualizations for your ML projects

---

**Course:** MLM-101 - Machine Learning Mastery  
**Website:** [https://flowdiary.com.ng/course/MLM-101](https://flowdiary.com.ng/course/MLM-101)