# Random Walk in 2D: Standard Deviation vs Number of Steps

This notebook explores how the standard deviation of the final position changes with the number of steps in a 2D random walk.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

## Single Random Walk

First, let's visualize a single 2D random walk trajectory.

In [None]:
def random_walk_2d(n_steps):
    """Generate a 2D random walk with n_steps."""
    # Random steps in 4 cardinal directions
    angles = np.random.choice([0, np.pi/2, np.pi, 3*np.pi/2], size=n_steps)
    dx = np.cos(angles)
    dy = np.sin(angles)
    
    x = np.cumsum(dx)
    y = np.cumsum(dy)
    
    x = np.concatenate([[0], x])
    y = np.concatenate([[0], y])
    
    return x, y

# Example walk
n_steps = 1000
x, y = random_walk_2d(n_steps)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Trajectory plot
ax1.plot(x, y, alpha=0.6, linewidth=0.5)
ax1.plot(0, 0, 'go', markersize=10, label='Start')
ax1.plot(x[-1], y[-1], 'ro', markersize=10, label='End')
ax1.set_xlabel('X Position')
ax1.set_ylabel('Y Position')
ax1.set_title(f'2D Random Walk Trajectory ({n_steps} steps)')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.axis('equal')

# Distance from origin over time
distance = np.sqrt(x**2 + y**2)
ax2.plot(distance)
ax2.set_xlabel('Step')
ax2.set_ylabel('Distance from Origin')
ax2.set_title('Distance from Origin vs Step')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Final position: ({x[-1]:.1f}, {y[-1]:.1f})")
print(f"Final distance from origin: {distance[-1]:.1f}")

## Multiple Trajectories Visualization

Let's visualize multiple walks to see the spread.

In [None]:
n_steps = 500
n_walks = 50

plt.figure(figsize=(10, 10))
for i in range(n_walks):
    x, y = random_walk_2d(n_steps)
    plt.plot(x, y, alpha=0.3, linewidth=0.5)
    plt.plot(x[-1], y[-1], 'o', markersize=4, alpha=0.5)

plt.plot(0, 0, 'go', markersize=15, label='Start', zorder=100)
plt.xlabel('X Position')
plt.ylabel('Y Position')
plt.title(f'{n_walks} Random Walks ({n_steps} steps each)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.axis('equal')
plt.show()

## Standard Deviation vs Number of Steps

Theory predicts that the standard deviation of the distance from origin should scale as $\sigma \sim \sqrt{N}$, where $N$ is the number of steps.

In [None]:
def measure_stddev_2d(n_steps, n_walks=1000):
    """Measure standard deviation of final distance from origin for multiple walks."""
    final_distances = np.zeros(n_walks)
    for i in range(n_walks):
        x, y = random_walk_2d(n_steps)
        final_distances[i] = np.sqrt(x[-1]**2 + y[-1]**2)
    return np.std(final_distances)

# Test different numbers of steps
step_counts = np.logspace(1, 4, 20, dtype=int)  # From 10 to 10,000 steps
stddevs = []
n_walks = 1000

print(f"Running {n_walks} walks for each step count...")
for n_steps in step_counts:
    stddev = measure_stddev_2d(n_steps, n_walks)
    stddevs.append(stddev)
    print(f"N={n_steps:5d}, σ={stddev:.2f}")

stddevs = np.array(stddevs)

## Visualization and Analysis

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Linear scale
ax1.scatter(step_counts, stddevs, label='Measured', alpha=0.6)
ax1.plot(step_counts, np.sqrt(step_counts), 'r--', label='Theoretical $\\sqrt{N}$', linewidth=2)
ax1.set_xlabel('Number of Steps (N)')
ax1.set_ylabel('Standard Deviation of Distance (σ)')
ax1.set_title('Standard Deviation vs Number of Steps (2D)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Log-log scale
ax2.loglog(step_counts, stddevs, 'o', label='Measured', alpha=0.6)
ax2.loglog(step_counts, np.sqrt(step_counts), 'r--', label='Theoretical $\\sqrt{N}$', linewidth=2)
ax2.set_xlabel('Number of Steps (N)')
ax2.set_ylabel('Standard Deviation of Distance (σ)')
ax2.set_title('Log-Log Plot')
ax2.legend()
ax2.grid(True, alpha=0.3, which='both')

plt.tight_layout()
plt.show()

## Power Law Fitting

Let's fit the data to verify the $\sqrt{N}$ relationship.

In [None]:
# Fit power law: σ = a * N^b
# In log space: log(σ) = log(a) + b * log(N)
log_steps = np.log(step_counts)
log_stddevs = np.log(stddevs)

slope, intercept, r_value, p_value, std_err = stats.linregress(log_steps, log_stddevs)

print(f"\nPower Law Fit: σ = {np.exp(intercept):.3f} * N^{slope:.3f}")
print(f"Theoretical exponent: 0.5")
print(f"Measured exponent: {slope:.3f}")
print(f"R² = {r_value**2:.6f}")
print(f"Relative error: {abs(slope - 0.5) / 0.5 * 100:.2f}%")

## Distribution of Final Distances

Let's examine the distribution of final distances for a fixed number of steps. In 2D, the distribution follows a Rayleigh distribution.

In [None]:
n_steps = 1000
n_walks = 5000
final_distances = np.zeros(n_walks)

for i in range(n_walks):
    x, y = random_walk_2d(n_steps)
    final_distances[i] = np.sqrt(x[-1]**2 + y[-1]**2)

plt.figure(figsize=(12, 5))
plt.hist(final_distances, bins=50, density=True, alpha=0.7, label='Measured')

# Overlay theoretical Rayleigh distribution
# For 2D random walk, the distance follows a Rayleigh distribution
# with parameter sigma = sqrt(N/2) for unit steps
x = np.linspace(0, final_distances.max(), 100)
rayleigh_scale = np.sqrt(n_steps / 2)
plt.plot(x, stats.rayleigh.pdf(x, scale=rayleigh_scale), 'r-', linewidth=2, 
         label=f'Rayleigh($\\sqrt{{N/2}}$)')

plt.xlabel('Final Distance from Origin')
plt.ylabel('Probability Density')
plt.title(f'Distribution of Final Distances ({n_steps} steps, {n_walks} walks)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"\nMeasured mean: {np.mean(final_distances):.2f}")
print(f"Measured std: {np.std(final_distances):.2f}")
print(f"Theoretical mean (Rayleigh): {rayleigh_scale * np.sqrt(np.pi/2):.2f}")
print(f"Theoretical std (Rayleigh): {rayleigh_scale * np.sqrt(2 - np.pi/2):.2f}")

## 2D Spatial Distribution of Final Positions

Let's visualize the spatial distribution of final positions.

In [None]:
n_steps = 1000
n_walks = 2000
final_x = np.zeros(n_walks)
final_y = np.zeros(n_walks)

for i in range(n_walks):
    x, y = random_walk_2d(n_steps)
    final_x[i] = x[-1]
    final_y[i] = y[-1]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Scatter plot
ax1.scatter(final_x, final_y, alpha=0.3, s=10)
ax1.plot(0, 0, 'ro', markersize=10, label='Origin')
ax1.set_xlabel('X Position')
ax1.set_ylabel('Y Position')
ax1.set_title(f'Final Positions ({n_steps} steps, {n_walks} walks)')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.axis('equal')

# Add theoretical circle (1 standard deviation)
circle_radius = np.sqrt(n_steps / 2)
theta = np.linspace(0, 2*np.pi, 100)
ax1.plot(circle_radius * np.cos(theta), circle_radius * np.sin(theta), 
         'r--', linewidth=2, label=f'Expected std radius')
ax1.legend()

# 2D histogram (heatmap)
hist = ax2.hist2d(final_x, final_y, bins=40, cmap='hot')
ax2.plot(0, 0, 'go', markersize=10, label='Origin')
ax2.set_xlabel('X Position')
ax2.set_ylabel('Y Position')
ax2.set_title('Density Heatmap of Final Positions')
ax2.axis('equal')
plt.colorbar(hist[3], ax=ax2, label='Count')

plt.tight_layout()
plt.show()

print(f"\nX component - Mean: {np.mean(final_x):.2f}, Std: {np.std(final_x):.2f}")
print(f"Y component - Mean: {np.mean(final_y):.2f}, Std: {np.std(final_y):.2f}")
print(f"Expected std for each component: {np.sqrt(n_steps/2):.2f}")

## Comparison: 1D vs 2D

Let's compare the scaling behavior between 1D and 2D random walks.

In [None]:
def random_walk_1d(n_steps):
    """Generate a 1D random walk with n_steps."""
    steps = np.random.choice([-1, 1], size=n_steps)
    position = np.cumsum(steps)
    return np.concatenate([[0], position])

def measure_stddev_1d(n_steps, n_walks=1000):
    """Measure standard deviation of absolute final position for 1D walks."""
    final_positions = np.zeros(n_walks)
    for i in range(n_walks):
        walk = random_walk_1d(n_steps)
        final_positions[i] = abs(walk[-1])
    return np.std(final_positions)

# Measure for both 1D and 2D
step_counts_comp = np.logspace(1, 3.5, 15, dtype=int)
stddevs_1d = []
stddevs_2d = []
n_walks = 1000

print("Comparing 1D and 2D random walks...")
for n_steps in step_counts_comp:
    stddev_1d = measure_stddev_1d(n_steps, n_walks)
    stddev_2d = measure_stddev_2d(n_steps, n_walks)
    stddevs_1d.append(stddev_1d)
    stddevs_2d.append(stddev_2d)
    print(f"N={n_steps:5d}, σ_1D={stddev_1d:.2f}, σ_2D={stddev_2d:.2f}")

stddevs_1d = np.array(stddevs_1d)
stddevs_2d = np.array(stddevs_2d)

# Plot comparison
plt.figure(figsize=(12, 5))
plt.loglog(step_counts_comp, stddevs_1d, 'o-', label='1D Random Walk', alpha=0.7)
plt.loglog(step_counts_comp, stddevs_2d, 's-', label='2D Random Walk', alpha=0.7)
plt.loglog(step_counts_comp, np.sqrt(step_counts_comp), 'r--', 
           label='Theoretical $\\sqrt{N}$', linewidth=2)
plt.xlabel('Number of Steps (N)')
plt.ylabel('Standard Deviation (σ)')
plt.title('Comparison: 1D vs 2D Random Walk Scaling')
plt.legend()
plt.grid(True, alpha=0.3, which='both')
plt.show()