# Exercise 4: Matrix Multiplication - Scaling Analysis Visualizations

This notebook visualizes the excellent scaling properties of matrix multiplication and compares with Exercise 3.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import csv

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 11

## Load Data

In [None]:
# Load profiling results
profiling_results = []
with open('profiling_results.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        profiling_results.append({
            'N': int(row['N']),
            'total': int(row['total']),
            'generate_noise': int(row['generate_noise']),
            'init_matrix': int(row['init_matrix']),
            'matmul': int(row['matmul']),
            'fs': float(row['fs']),
            'fp': float(row['fp'])
        })

# Load scaling results
scaling_results = []
with open('scaling_results.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        scaling_results.append({
            'N': int(row['N']),
            'Processors': int(row['Processors']),
            'fs': float(row['fs']),
            'Amdahl_Speedup': float(row['Amdahl_Speedup']),
            'Gustafson_Speedup': float(row['Gustafson_Speedup']),
            'Efficiency': float(row['Efficiency'])
        })

print(f"Loaded {len(profiling_results)} profiling results")
print(f"Loaded {len(scaling_results)} scaling results")
print(f"\nSequential fractions:")
for result in profiling_results:
    print(f"  N = {result['N']:>3}: fs = {result['fs']:.6f} ({result['fs']*100:.4f}%)")

## Question 1-2: Strong Scaling (Amdahl's Law)

Plot speedup curves showing near-perfect linear scaling.

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

colors = ['blue', 'green', 'red']
markers = ['o', 's', '^']

# Plot 1: Speedup curves for different N
for i, n_val in enumerate([128, 256, 512]):
    data_n = [r for r in scaling_results if r['N'] == n_val]
    procs = [r['Processors'] for r in data_n]
    speedup = [r['Amdahl_Speedup'] for r in data_n]
    fs_val = data_n[0]['fs']
    
    label = f'N={n_val} (fs={fs_val:.6f})'
    ax1.plot(procs, speedup, marker=markers[i], linewidth=2.5, markersize=8,
             color=colors[i], label=label)

# Add ideal linear speedup
ax1.plot(procs, procs, '--', linewidth=2.5, color='black', alpha=0.5, label='Linear (Ideal)')

ax1.set_xlabel('Number of Processors (p)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Speedup S(p)', fontsize=12, fontweight='bold')
ax1.set_title('Amdahl\'s Law: Near-Perfect Scaling\n(Matrix Multiplication)', 
              fontsize=14, fontweight='bold')
ax1.legend(fontsize=10, loc='upper left')
ax1.grid(True, alpha=0.3)
ax1.set_xscale('log', base=2)
ax1.set_yscale('log', base=2)
ax1.set_xticks(procs)
ax1.set_yticks(procs)
ax1.set_xticklabels(procs)
ax1.set_yticklabels(procs)

# Plot 2: Efficiency (stays near 100%)
for i, n_val in enumerate([128, 256, 512]):
    data_n = [r for r in scaling_results if r['N'] == n_val]
    procs = [r['Processors'] for r in data_n]
    efficiency = [r['Efficiency'] * 100 for r in data_n]  # Convert to percentage
    
    label = f'N={n_val}'
    ax2.plot(procs, efficiency, marker=markers[i], linewidth=2.5, markersize=8,
             color=colors[i], label=label)

ax2.axhline(y=100, color='green', linestyle='--', linewidth=2, alpha=0.5, label='Perfect (100%)')
ax2.set_xlabel('Number of Processors (p)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Parallel Efficiency (%)', fontsize=12, fontweight='bold')
ax2.set_title('Efficiency Remains Near 100%', fontsize=14, fontweight='bold')
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
ax2.set_xscale('log', base=2)
ax2.set_xticks(procs)
ax2.set_xticklabels(procs)
ax2.set_ylim([95, 101])

plt.tight_layout()
plt.savefig('q1_q2_amdahl_scaling.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nKey Observation: Near-perfect linear scaling!")
print("Sequential fraction is so small (< 0.01%) that it's negligible.")

## Question 3: Amdahl vs Gustafson

When fs ≈ 0, both laws predict identical scaling.

In [None]:
# Use N=512 data for comparison
data_512 = [r for r in scaling_results if r['N'] == 512]

processors = [r['Processors'] for r in data_512]
amdahl_speedup = [r['Amdahl_Speedup'] for r in data_512]
gustafson_speedup = [r['Gustafson_Speedup'] for r in data_512]
fs = data_512[0]['fs']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Direct comparison
ax1.plot(processors, amdahl_speedup, 'o-', linewidth=2.5, markersize=10,
         label=f'Amdahl (fs={fs:.6f})', color='blue')
ax1.plot(processors, gustafson_speedup, 's--', linewidth=2.5, markersize=10,
         label=f'Gustafson (fs={fs:.6f})', color='green', alpha=0.7)
ax1.plot(processors, processors, ':', linewidth=2, alpha=0.5,
         label='Linear (Ideal)', color='gray')

ax1.set_xlabel('Number of Processors (p)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Speedup S(p)', fontsize=12, fontweight='bold')
ax1.set_title('Amdahl vs Gustafson (N=512)\nBoth Predict Linear Scaling!', 
              fontsize=14, fontweight='bold')
ax1.legend(fontsize=11, loc='upper left')
ax1.grid(True, alpha=0.3)
ax1.set_xscale('log', base=2)
ax1.set_yscale('log', base=2)
ax1.set_xticks(processors)
ax1.set_yticks(processors)
ax1.set_xticklabels(processors)
ax1.set_yticklabels(processors)

# Plot 2: Difference (should be negligible)
difference = [g - a for g, a in zip(gustafson_speedup, amdahl_speedup)]
percent_diff = [(g - a) / a * 100 for g, a in zip(gustafson_speedup, amdahl_speedup)]

ax2.plot(processors, percent_diff, '^-', linewidth=2.5, markersize=10, color='purple')
ax2.axhline(y=0, color='gray', linestyle='--', linewidth=2, alpha=0.5)
ax2.set_xlabel('Number of Processors (p)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Difference (%)', fontsize=12, fontweight='bold')
ax2.set_title('Gustafson - Amdahl Difference\n(Negligible < 0.1%)', 
              fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)
ax2.set_xscale('log', base=2)
ax2.set_xticks(processors)
ax2.set_xticklabels(processors)

plt.tight_layout()
plt.savefig('q3_amdahl_vs_gustafson.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nAt p=64:")
print(f"  Amdahl:    {amdahl_speedup[-1]:.3f}x")
print(f"  Gustafson: {gustafson_speedup[-1]:.3f}x")
print(f"  Difference: {difference[-1]:.3f} ({percent_diff[-1]:.3f}%)")
print(f"\nConclusion: With fs ≈ 0, both laws converge to S(p) ≈ p")

## Question 4: Effect of Problem Size

Sequential fraction decreases as matrix size increases.

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Sequential fraction vs N
n_values = [r['N'] for r in profiling_results]
fs_values = [r['fs'] * 100 for r in profiling_results]  # Convert to percentage

ax1.plot(n_values, fs_values, 'o-', linewidth=3, markersize=12, color='red')
ax1.set_xlabel('Matrix Size (N)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Sequential Fraction fs (%)', fontsize=12, fontweight='bold')
ax1.set_title('Sequential Fraction Decreases with Problem Size\nfs ∝ 1/N²', 
              fontsize=14, fontweight='bold')
ax1.grid(True, alpha=0.3)
ax1.set_yscale('log')

# Add annotations
for i, (n, fs) in enumerate(zip(n_values, fs_values)):
    ax1.annotate(f'{fs:.4f}%', xy=(n, fs), xytext=(10, 10),
                textcoords='offset points', fontsize=10, fontweight='bold')

# Plot 2: Maximum speedup vs N
max_speedups = [1/r['fs'] for r in profiling_results]

ax2.plot(n_values, max_speedups, 's-', linewidth=3, markersize=12, color='green')
ax2.set_xlabel('Matrix Size (N)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Maximum Speedup (1/fs)', fontsize=12, fontweight='bold')
ax2.set_title('Maximum Speedup Increases with Problem Size\nS_max = 1/fs', 
              fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)
ax2.set_yscale('log')

# Add annotations
for i, (n, s_max) in enumerate(zip(n_values, max_speedups)):
    ax2.annotate(f'{s_max:,.0f}x', xy=(n, s_max), xytext=(10, -15),
                textcoords='offset points', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig('q4_problem_size_effect.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nScaling Analysis:")
print("  Sequential part:     O(N)")
print("  Parallelizable part: O(N³)")
print("  Sequential fraction: fs = O(N) / O(N³) = O(1/N²)")
print("  Maximum speedup:     S_max = 1/fs = O(N²)")
print("\n→ Larger problems have better parallel efficiency!")

## Question 5: Comparison with Exercise 3

Dramatic contrast in scaling behavior.

In [None]:
# Exercise 3 data (from previous analysis)
fs_ex3 = 0.3074
processors = [1, 2, 4, 8, 16, 32, 64]

# Calculate Exercise 3 speedups
amdahl_ex3 = [1 / (fs_ex3 + (1-fs_ex3)/p) for p in processors]
gustafson_ex3 = [p - fs_ex3*(p-1) for p in processors]

# Exercise 4 data (N=512)
data_ex4 = [r for r in scaling_results if r['N'] == 512]
amdahl_ex4 = [r['Amdahl_Speedup'] for r in data_ex4]
gustafson_ex4 = [r['Gustafson_Speedup'] for r in data_ex4]
fs_ex4 = data_ex4[0]['fs']

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Amdahl comparison
ax1.plot(processors, amdahl_ex3, 'o-', linewidth=2.5, markersize=10,
         label=f'Exercise 3 (fs={fs_ex3:.3f})', color='red')
ax1.plot(processors, amdahl_ex4, 's-', linewidth=2.5, markersize=10,
         label=f'Exercise 4 (fs={fs_ex4:.6f})', color='green')
ax1.plot(processors, processors, '--', linewidth=2, alpha=0.5,
         label='Linear (Ideal)', color='gray')
ax1.axhline(y=1/fs_ex3, color='red', linestyle=':', linewidth=2, alpha=0.5,
            label=f'Ex3 Max = {1/fs_ex3:.2f}x')

ax1.set_xlabel('Number of Processors (p)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Speedup S(p)', fontsize=12, fontweight='bold')
ax1.set_title('Amdahl\'s Law: Exercise 3 vs Exercise 4', fontsize=14, fontweight='bold')
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)
ax1.set_xscale('log', base=2)
ax1.set_xticks(processors)
ax1.set_xticklabels(processors)

# Plot 2: Efficiency comparison
efficiency_ex3 = [a/p * 100 for a, p in zip(amdahl_ex3, processors)]
efficiency_ex4 = [a/p * 100 for a, p in zip(amdahl_ex4, processors)]

ax2.plot(processors, efficiency_ex3, 'o-', linewidth=2.5, markersize=10,
         label='Exercise 3', color='red')
ax2.plot(processors, efficiency_ex4, 's-', linewidth=2.5, markersize=10,
         label='Exercise 4', color='green')
ax2.axhline(y=100, color='gray', linestyle='--', linewidth=2, alpha=0.5)

ax2.set_xlabel('Number of Processors (p)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Efficiency (%)', fontsize=12, fontweight='bold')
ax2.set_title('Efficiency Comparison', fontsize=14, fontweight='bold')
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
ax2.set_xscale('log', base=2)
ax2.set_xticks(processors)
ax2.set_xticklabels(processors)

# Plot 3: Gustafson comparison
ax3.plot(processors, gustafson_ex3, 'o-', linewidth=2.5, markersize=10,
         label='Exercise 3', color='red')
ax3.plot(processors, gustafson_ex4, 's-', linewidth=2.5, markersize=10,
         label='Exercise 4', color='green')
ax3.plot(processors, processors, '--', linewidth=2, alpha=0.5,
         label='Linear (Ideal)', color='gray')

ax3.set_xlabel('Number of Processors (p)', fontsize=12, fontweight='bold')
ax3.set_ylabel('Speedup S(p)', fontsize=12, fontweight='bold')
ax3.set_title('Gustafson\'s Law: Exercise 3 vs Exercise 4', fontsize=14, fontweight='bold')
ax3.legend(fontsize=10)
ax3.grid(True, alpha=0.3)
ax3.set_xscale('log', base=2)
ax3.set_xticks(processors)
ax3.set_xticklabels(processors)

# Plot 4: Summary comparison
categories = ['fs (%)', 'S(p=64)', 'Eff@64(%)', 'S_max']
ex3_values = [fs_ex3*100, amdahl_ex3[-1], efficiency_ex3[-1], 1/fs_ex3]
ex4_values = [fs_ex4*100, amdahl_ex4[-1], efficiency_ex4[-1], min(1/fs_ex4, 1000)]  # Cap for display

x = np.arange(len(categories))
width = 0.35

bars1 = ax4.bar(x - width/2, ex3_values, width, label='Exercise 3', color='red', alpha=0.7)
bars2 = ax4.bar(x + width/2, ex4_values, width, label='Exercise 4', color='green', alpha=0.7)

ax4.set_ylabel('Value', fontsize=12, fontweight='bold')
ax4.set_title('Key Metrics Comparison', fontsize=14, fontweight='bold')
ax4.set_xticks(x)
ax4.set_xticklabels(categories)
ax4.legend(fontsize=10)
ax4.grid(True, alpha=0.3, axis='y')
ax4.set_yscale('log')

# Add value labels
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax4.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}', ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('q5_comparison_with_ex3.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n" + "="*80)
print("EXERCISE 3 vs EXERCISE 4 COMPARISON")
print("="*80)
print(f"\n{'Metric':<30} {'Exercise 3':<20} {'Exercise 4':<20}")
print("-"*70)
print(f"{'Sequential fraction':<30} {fs_ex3*100:>18.4f}% {fs_ex4*100:>18.6f}%")
print(f"{'Speedup @ p=64':<30} {amdahl_ex3[-1]:>18.2f}x {amdahl_ex4[-1]:>18.2f}x")
print(f"{'Efficiency @ p=64':<30} {efficiency_ex3[-1]:>18.1f}% {efficiency_ex4[-1]:>18.1f}%")
print(f"{'Max theoretical speedup':<30} {1/fs_ex3:>18.2f}x {1/fs_ex4:>18.0f}x")
print(f"{'Optimal processors':<30} {'8-16':>20} {'64+':>20}")
print("-"*70)
print("\nConclusion: Exercise 4 shows dramatically better scalability!")
print("Reason: O(N³) parallelizable work dominates O(N) sequential overhead.")

## Additional Analysis: Instruction Distribution

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for idx, prof in enumerate(profiling_results):
    ax = axes[idx]
    
    labels = ['generate_noise\n(Sequential)', 'init_matrix\n(Parallel)', 'matmul\n(Parallel)']
    sizes = [prof['generate_noise'], prof['init_matrix'], prof['matmul']]
    colors = ['#ff6b6b', '#51cf66', '#339af0']
    explode = (0.1, 0, 0)
    
    wedges, texts, autotexts = ax.pie(sizes, explode=explode, labels=labels, colors=colors,
                                       autopct='%1.2f%%', shadow=True, startangle=90,
                                       textprops={'fontsize': 10, 'fontweight': 'bold'})
    
    ax.set_title(f'N = {prof["N"]}\nfs = {prof["fs"]*100:.4f}%', 
                fontsize=12, fontweight='bold')

plt.suptitle('Instruction Distribution: Sequential Overhead Shrinks with N', 
             fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('instruction_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nAs N increases, sequential part (red) becomes invisible!")
print("This is the key to excellent scalability in Exercise 4.")