In [1]:
import numpy as np

# Slow: Loop-based filtering
data = np.random.randn(1000000)
result = []
for x in data:
    if x > 0:
        result.append(x * 2)
    else:
        result.append(x)
result = np.array(result)


In [2]:
# Fast: Boolean indexing
data = np.random.randn(1000000)
result = data.copy()
result[data > 0] *= 2
result

array([ 0.69089603,  1.17710875,  1.52820161, ..., -0.45350551,
       -0.61195101,  1.61773308])

In [3]:
# Slow: Explicit loops
matrix = np.random.rand(1000, 500)
row_means = np.mean(matrix, axis=1)
centered = np.zeros_like(matrix)
for i in range(matrix.shape[0]):
    centered[i] = matrix[i] - row_means[i]


In [4]:
# Fast: Broadcasting
matrix = np.random.rand(1000, 500)
row_means = np.mean(matrix, axis=1, keepdims=True)
centered = matrix - row_means
centered

array([[ 0.39327569, -0.36106972,  0.32706478, ..., -0.19269519,
         0.06705477,  0.31729336],
       [ 0.48859392,  0.11093355,  0.00800482, ..., -0.06522932,
        -0.35192755,  0.06395108],
       [ 0.00253016, -0.37329837, -0.03237891, ..., -0.22215864,
        -0.35919685,  0.44490038],
       ...,
       [ 0.0494764 , -0.04415978, -0.38098251, ..., -0.13091783,
        -0.06897875, -0.31313201],
       [-0.48721384,  0.37003225,  0.35703322, ..., -0.40694101,
        -0.19313321,  0.18964761],
       [-0.23360043,  0.47102064, -0.18594549, ..., -0.48408184,
        -0.37162009,  0.44321719]])

In [5]:
# Slow: Conditional logic in loops
temps = np.random.uniform(-10, 40, 100000)
classifications = []
for t in temps:
    if t < 0:
        classifications.append('freezing')
    elif t < 20:
        classifications.append('cool')
    else:
        classifications.append('warm')


In [6]:
# Fast: np.where() and np.select()
temps = np.random.uniform(-10, 40, 100000)
classifications = np.select(
    [temps < 0, temps < 20, temps >= 20],
    ['freezing', 'cool', 'warm'],
    default='unknown' # Added a string default value
)

# For simple splits, np.where() is cleaner:
scores = np.random.randint(0, 100, 10000)
results = np.where(scores >= 60, 'pass', 'fail')
results

array(['fail', 'fail', 'pass', ..., 'fail', 'fail', 'pass'], dtype='<U4')

In [7]:
# Slow: Loop-based gathering
lookup_table = np.array([10, 20, 30, 40, 50])
indices = np.random.randint(0, 5, 100000)
results = []
for idx in indices:
    results.append(lookup_table[idx])
results = np.array(results)


In [8]:
lookup_table = np.array([10, 20, 30, 40, 50])
indices = np.random.randint(0, 5, 100000)
results = lookup_table[indices]
results

array([10, 10, 30, ..., 50, 30, 50])

In [9]:
matrix = np.arange(20).reshape(4, 5)
row_indices = np.array([0, 2, 3])
col_indices = np.array([1, 3, 4])
values = matrix[row_indices, col_indices]  # Gets matrix[0,1], matrix[2,3], matrix[3,4]
values

array([ 1, 13, 19])

In [10]:
# Slow: Manual looping
def complex_transform(x):
    if x < 0:
        return np.sqrt(abs(x)) * -1
    else:
        return x ** 2

data = np.random.randn(10000)
results = np.array([complex_transform(x) for x in data])


In [11]:
# Cleaner: np.vectorize()
def complex_transform(x):
    if x < 0:
        return np.sqrt(abs(x)) * -1
    else:
        return x ** 2

vec_transform = np.vectorize(complex_transform)
data = np.random.randn(10000)
results = vec_transform(data)
results

array([-1.73985827, -1.07253462,  1.48880208, ..., -0.55911067,
        2.07688909,  0.3801597 ])

In [12]:
# Matrix multiplication the standard way
A = np.random.rand(100, 50)
B = np.random.rand(50, 80)
C = np.dot(A, B)

# Batch matrix multiply - gets messy
batch_A = np.random.rand(32, 10, 20)
batch_B = np.random.rand(32, 20, 15)
results = np.zeros((32, 10, 15))
for i in range(32):
    results[i] = np.dot(batch_A[i], batch_B[i])


In [13]:
# Clean: einsum
A = np.random.rand(100, 50)
B = np.random.rand(50, 80)
C = np.einsum('ij,jk->ik', A, B)

# Batch matrix multiply - single line
batch_A = np.random.rand(32, 10, 20)
batch_B = np.random.rand(32, 20, 15)
results = np.einsum('bij,bjk->bik', batch_A, batch_B)
results

array([[[6.25108829, 4.77130093, 4.39155311, ..., 5.37273891,
         5.67103736, 6.86695143],
        [5.76547923, 4.75428207, 4.11906078, ..., 5.42274164,
         5.45482335, 6.14856137],
        [6.3975369 , 5.06723946, 5.72924723, ..., 5.98162845,
         5.59481404, 6.88355213],
        ...,
        [6.80753626, 5.19919749, 5.41130595, ..., 7.11294579,
         7.10470788, 7.65415337],
        [4.78699829, 3.53428568, 4.61511915, ..., 6.49201884,
         5.55899428, 6.12393186],
        [4.91163825, 3.34272376, 4.02567196, ..., 5.03267927,
         4.69963989, 5.11767804]],

       [[6.94556425, 5.103543  , 5.91737217, ..., 5.86563892,
         5.30200755, 4.33667066],
        [6.44729552, 4.4082003 , 5.75816895, ..., 5.48958554,
         5.23924495, 4.71299021],
        [4.87689723, 3.7491932 , 5.02308839, ..., 3.96110243,
         4.41605575, 3.7309767 ],
        ...,
        [5.75100901, 3.86794695, 5.45101715, ..., 4.59358929,
         4.7137363 , 3.83199041],
        [6.6

In [14]:
# Trace (sum of diagonal)
matrix = np.random.rand(100, 100)
trace = np.einsum('ii->', matrix)

# Transpose
transposed = np.einsum('ij->ji', matrix)

# Element-wise multiply then sum
A = np.random.rand(50, 50)
B = np.random.rand(50, 50)
result = np.einsum('ij,ij->', A, B)  # Same as np.sum(A * B)


In [15]:
# Slow: Manual row iteration
data = np.random.rand(1000, 50)
row_stats = []
for i in range(data.shape[0]):
    row = data[i]
    # Custom statistic not in NumPy
    stat = (np.max(row) - np.min(row)) / np.median(row)
    row_stats.append(stat)
row_stats = np.array(row_stats)


In [16]:
# Cleaner: apply_along_axis
data = np.random.rand(1000, 50)

def custom_stat(row):
    return (np.max(row) - np.min(row)) / np.median(row)

row_stats = np.apply_along_axis(custom_stat, axis=1, arr=data)
row_stats

array([2.41978119, 2.49652051, 1.74064468, 1.74939996, 1.79138125,
       1.83127243, 2.0400406 , 2.05816026, 2.14578581, 2.45226877,
       2.30349011, 1.55466768, 1.70373532, 2.11879228, 2.1005599 ,
       1.77640438, 2.02814934, 1.86428694, 2.14789024, 1.89686728,
       1.92270134, 2.24323058, 2.14874672, 2.74337175, 2.18139106,
       2.03591645, 2.04980086, 2.07592962, 1.74998987, 2.85417082,
       1.819107  , 1.85168633, 1.66421999, 2.16678039, 1.90761503,
       1.69201517, 1.66579827, 2.51223229, 2.02601123, 2.14138557,
       1.73197181, 2.377144  , 1.86029548, 1.56714835, 1.8085319 ,
       2.00827885, 2.18031208, 2.00985064, 1.86746965, 1.9335729 ,
       2.42407445, 1.88048817, 1.94069372, 1.8475194 , 2.29905336,
       2.413751  , 2.11023704, 1.66747694, 2.64348961, 2.13534845,
       2.23087564, 2.22609149, 1.56202612, 1.72052257, 1.96884931,
       2.13795462, 1.45894125, 2.20751396, 1.61629076, 2.19977415,
       1.61235232, 2.23810389, 2.01043052, 2.0762094 , 2.73250

In [17]:
# Apply to each column
col_stats = np.apply_along_axis(custom_stat, axis=0, arr=data)
col_stats

array([1.97637879, 1.94987599, 2.09835003, 2.00906579, 2.02337132,
       1.94502207, 2.05812684, 2.02466847, 1.99244429, 2.09430515,
       2.09273801, 1.97682164, 2.02909129, 1.96316459, 2.06509862,
       1.92348273, 1.94939927, 1.99921094, 1.99509331, 1.96049416,
       2.00186574, 2.10639889, 2.08240171, 1.94793256, 1.90583048,
       1.95283809, 2.00247982, 1.96811153, 1.98704148, 2.03440558,
       1.99385715, 2.07371776, 2.09406353, 1.96270549, 2.00995047,
       2.04405825, 2.11616309, 2.03714252, 1.97189588, 1.89745957,
       1.96951282, 2.02039512, 1.93237229, 1.97044772, 2.02512535,
       1.9687144 , 2.06680975, 1.98558279, 2.09223474, 1.92270055])