##  **%timeit (Jupyter Notebooks)**

Please note that each time you run %timeit, the exact timing may vary.

In [2]:
import numpy as np

# Example#1  : measure the time to create an array
print("Time to create array",end = ':')
%timeit np.arange(1_000_000)

# Example#2 : Comparing two approaches for summing an array
array = np.random.rand(1_000_000)

# Using Python's built-in sum()
print("Time for built-in sum()",end = ':')
%timeit sum(array)

# Using NumPy's vectorized sum
print("Time for np.sum()",end = ':')
%timeit np.sum(array)

Time to create array:363 µs ± 12.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
Time for built-in sum():69 ms ± 1.05 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Time for np.sum():216 µs ± 2.37 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## **Python’s time Module**

Please note that each time you run the exact timing may vary.

In [3]:
import time
import numpy as np

# Example #1:  measure the time to create an array
start_time = time.time()
np.arange(1_000_000)
end_time = time.time()
print("Time to create array:", end_time - start_time, "seconds")

# Example #2: Comparing two approaches for summing an array
array = np.random.rand(1_000_000)

# Using Python's built-in sum()
start_time = time.time()
sum(array)
end_time = time.time()
print("Time for built-in sum():", end_time - start_time, "seconds")

# Using NumPy's np.sum()
start_time = time.time()
np.sum(array)
end_time = time.time()
print("Time for np.sum():", end_time - start_time, "seconds")


Time to create array: 0.0008337497711181641 seconds
Time for built-in sum(): 0.06859707832336426 seconds
Time for np.sum(): 0.001180887222290039 seconds


## **cProfile**


Please note that each time you run the exact timing may vary.

In [4]:
import cProfile
import numpy as np

def create_array():
    return np.arange(2_000_000)

def sum_array(array):
    return np.sum(array)

# Main function to profile
def main():
    array = create_array()
    total = sum_array(array)
    print("Total sum:", total)

# Run the profiler
cProfile.run('main()')


Total sum: 1999999000000
         75 function calls in 0.008 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.008    0.008 <ipython-input-4-b23e58330b4b>:11(main)
        1    0.000    0.000    0.004    0.004 <ipython-input-4-b23e58330b4b>:4(create_array)
        1    0.000    0.000    0.004    0.004 <ipython-input-4-b23e58330b4b>:7(sum_array)
        1    0.000    0.000    0.008    0.008 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 fromnumeric.py:2172(_sum_dispatcher)
        1    0.000    0.000    0.004    0.004 fromnumeric.py:2177(sum)
        1    0.000    0.000    0.004    0.004 fromnumeric.py:71(_wrapreduction)
        1    0.000    0.000    0.000    0.000 fromnumeric.py:72(<dictcomp>)
        5    0.000    0.000    0.000    0.000 iostream.py:195(schedule)
        4    0.000    0.000    0.000    0.000 iostream.py:308(_is_master_process)
        4    0.000    0.000 

##	**line_profiler**


Please note that each time you run the exact timing may vary.

In [5]:
# install line_profiler if not already installed
! pip install line_profiler
print("Line profiler installed")

Collecting line_profiler
  Downloading line_profiler-4.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (34 kB)
Downloading line_profiler-4.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (718 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.3/718.3 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: line_profiler
Successfully installed line_profiler-4.2.0
Line profiler installed


In [6]:
#load line_profiler extension
%load_ext line_profiler
print("Line profiler extension loaded")

Line profiler extension loaded


In [8]:
import numpy as np

# Create an array
def create_arr():
    return np.arange(1_00_000)

# Sum an array
def sum_arr(arr):
    sorted_arr = np.sort(arr)
    return np.sum(sorted_arr)

# sum process
def sum_process():
    arr = create_arr()
    total = sum_arr(arr)
    print("Total sum:", total)

#runing line_profiler using magic command
%lprun -f create_arr -f sum_arr -f sum_process sum_process()

Total sum: 4999950000


In [9]:
import numpy as np
from line_profiler import LineProfiler

# Create an array
def create_arr():
    return np.arange(1_00_000)

# Sum an array
def sum_arr(arr):
    sorted_arr = np.sort(arr)
    return np.sum(sorted_arr)

# sum process
def sum_process():
    arr = create_arr()
    total = sum_arr(arr)
    print("Total sum:", total)


#using line_profiler
profiler = LineProfiler()
profiler.add_function(create_arr)
profiler.add_function(sum_arr)
profiler.add_function(sum_process)
profiler.run('sum_process()')
profiler.print_stats()

Total sum: 4999950000
Timer unit: 1e-09 s

Total time: 0.00057356 s
File: <ipython-input-9-caced5388a27>
Function: create_arr at line 5

Line #      Hits         Time  Per Hit   % Time  Line Contents
     5                                           def create_arr():
     6         1     573560.0 573560.0    100.0      return np.arange(1_00_000)

Total time: 0.00125668 s
File: <ipython-input-9-caced5388a27>
Function: sum_arr at line 9

Line #      Hits         Time  Per Hit   % Time  Line Contents
     9                                           def sum_arr(arr):
    10         1    1159310.0    1e+06     92.3      sorted_arr = np.sort(arr)
    11         1      97370.0  97370.0      7.7      return np.sum(sorted_arr)

Total time: 0.00198233 s
File: <ipython-input-9-caced5388a27>
Function: sum_process at line 14

Line #      Hits         Time  Per Hit   % Time  Line Contents
    14                                           def sum_process():
    15         1     589360.0 589360.0     29

## **Parallel Computing with NumPy**

### **Implicit Parallelism in NumPy**

In [12]:
import numpy as np
np.show_config()

Build Dependencies:
  blas:
    detection method: pkgconfig
    found: true
    include directory: /usr/local/include
    lib directory: /usr/local/lib
    name: openblas64
    openblas configuration: USE_64BITINT=1 DYNAMIC_ARCH=1 DYNAMIC_OLDER= NO_CBLAS=
      NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP= HASWELL MAX_THREADS=2
    pc file directory: /usr/local/lib/pkgconfig
    version: 0.3.23.dev
  lapack:
    detection method: internal
    found: true
    include directory: unknown
    lib directory: unknown
    name: dep139863411681952
    openblas configuration: unknown
    pc file directory: unknown
    version: 1.26.4
Compilers:
  c:
    args: -fno-strict-aliasing
    commands: cc
    linker: ld.bfd
    linker args: -Wl,--strip-debug, -fno-strict-aliasing
    name: gcc
    version: 10.2.1
  c++:
    commands: c++
    linker: ld.bfd
    linker args: -Wl,--strip-debug
    name: gcc
    version: 10.2.1
  cython:
    commands: cython
    linker: cython
    name: cython
    versio

### 	**Using the multiprocessing Module**

In [None]:
from multiprocessing import Pool
import numpy as np
import time

# Define the function to simulate points in a quarter circle
def monte_carlo_pi_part(n):
    np.random.seed()  # Each process should have its own random seed
    x = np.random.rand(n)
    y = np.random.rand(n)
    inside_circle = np.sum(x**2 + y**2 <= 1)
    return inside_circle

# Total number of points
n_points = 1000_000_000

# Single-threaded version
start_time = time.time()
inside_circle = monte_carlo_pi_part(n_points)
pi_single = (inside_circle / n_points) * 4
end_time = time.time()
print(f"Single-threaded π approximation: {pi_single}")
print(f"Single-threaded computation time: {end_time - start_time:.4f} seconds")

# Multiprocessing version
n_processes = 4  # Number of processes to use
points_per_process = n_points // n_processes

start_time = time.time()
with Pool(n_processes) as pool:
    results = pool.map(monte_carlo_pi_part, [points_per_process] * n_processes)
inside_circle_multi = sum(results)
pi_multi = (inside_circle_multi / n_points) * 4
end_time = time.time()
print(f"Multiprocessing π approximation: {pi_multi}")
print(f"Multiprocessing computation time: {end_time - start_time:.4f} seconds")


Single-threaded π approximation: 3.141570072
Single-threaded computation time: 29.6268 seconds
Multiprocessing π approximation: 3.141560632
Multiprocessing computation time: 6.3463 seconds


### **Using Dask for Large Datasets**

In [13]:
import dask.array as da
import numpy as np
import time

# Dask version
start_time = time.time()
x = da.random.random((10000, 10000), chunks=(1000, 1000))

y = x + x.T
result= y[::2, 5000:].mean(axis=1)
final_result_dask = result.compute()
end_time = time.time()
print(f"Dask mean calculation: Time taken (Dask): {end_time - start_time:.4f} seconds")

# NumPy version
start_time = time.time()

x_np = np.random.random((10000, 10000))
y = x_np + x_np.T
result= y[::2, 5000:].mean(axis=1)
end_time = time.time()
print(f"NumPy mean calculation: Time taken (NumPy): {end_time - start_time:.4f} seconds")


Dask mean calculation: Time taken (Dask): 0.1752 seconds
NumPy mean calculation: Time taken (NumPy): 0.9444 seconds


### **Using numexpr for Efficient Calculations**

In [None]:
import numexpr as ne
import numpy as np
import time

# Create two large arrays
a = np.random.rand(1_000_000_000)
b = np.random.rand(1_000_000_000)

# NumExpr version
start_time = time.time()
result_numexpr = ne.evaluate("a * b + a**2")
end_time = time.time()
print(f"NumExpr result (first 5 elements): {result_numexpr[:5]}")
print(f"Time taken with NumExpr: {end_time - start_time:.4f} seconds")

# NumPy version
start_time = time.time()
result_numpy = a * b + a**2
end_time = time.time()
print(f"NumPy result (first 5 elements): {result_numpy[:5]}")
print(f"Time taken with NumPy: {end_time - start_time:.4f} seconds")


NumExpr result (first 5 elements): [0.49522517 0.69049445 0.02798162 0.57816388 0.2218517 ]
Time taken with NumExpr: 0.6453 seconds
NumPy result (first 5 elements): [0.49522517 0.69049445 0.02798162 0.57816388 0.2218517 ]
Time taken with NumPy: 4.0546 seconds


### **Using the Numba Compiler**

In [None]:
import numpy as np
from numba import njit
import time

# Create two large arrays
a = np.random.rand(1_000_000_000)
b = np.random.rand(1_000_000_000)

# Define a function with Numba JIT compilation
@njit
def numba_expression(a, b):
    return a * b + a**2

# Numba Version
start_time = time.time()
result_numba = numba_expression(a, b)
end_time = time.time()
print(f"Numba result (first 5 elements): {result_numba[:5]}")
print(f"Time taken with Numba: {end_time - start_time:.4f} seconds")

# NumPy Version
start_time = time.time()
result_numpy = a * b + a**2
end_time = time.time()
print(f"NumPy result (first 5 elements): {result_numpy[:5]}")
print(f"Time taken with NumPy: {end_time - start_time:.4f} seconds")


Numba result (first 5 elements): [0.48115001 0.10296687 0.99067045 0.02163336 1.25756439]
Time taken with Numba: 3.6608 seconds
NumPy result (first 5 elements): [0.48115001 0.10296687 0.99067045 0.02163336 1.25756439]
Time taken with NumPy: 5.4296 seconds


### **Using Joblib**

In [None]:
from joblib import Parallel, delayed
import numpy as np
import math
import time

# Define a computationally intensive function
def compute_factorial(x):
    return math.factorial(x)

# Create a large array of numbers
data = np.arange(1, 10_000)  # Large range for factorial calculations

# NumPy version (single-threaded loop)
start_time = time.time()
results_numpy = [compute_factorial(x) for x in data]
end_time = time.time()
print(f"Single-threaded (NumPy-like) computation time: {end_time - start_time:.4f} seconds")

# Joblib version (parallelized)
start_time = time.time()
results_joblib = Parallel(n_jobs=-1)(delayed(compute_factorial)(x) for x in data)
end_time = time.time()
print(f"Parallel computation time with Joblib: {end_time - start_time:.4f} seconds")


Single-threaded (NumPy-like) computation time: 7.0207 seconds
Parallel computation time with Joblib: 1.5537 seconds
