## Understanding Numpy

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import time


#check Numpy version
print(f"Numpy Version: {np.__version__}" )

#display settings for cleaner output 
np.set_printoptions(precision=3, suppress=True)

Numpy Version: 2.3.2


Creating Numpy Arrays

In [3]:
# creating arrays from python lists
# 1D array: A simple sequence of numbers

arr1d =np.array([1,2,3,4,5]) 

#2D array: Think of this as a matrix or table with rows and columns 

arr2d = np.array([[1,2,3],
                  [4,5,6]])

#3D array: like a stack of 2d arrays - useful for images, time series, etc 
arr3d = np.array([[[1,2],[3,4]],
                  [[5,6],[7,8]]])

print("1D array: ", arr1d)
print("2D array:\n", arr2d)
print("3D array:\n", arr3d)


1D array:  [1 2 3 4 5]
2D array:
 [[1 2 3]
 [4 5 6]]
3D array:
 [[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


creating Special Arrays in Numpy 

In [29]:
#creating arrays filled wih zeros - useful for initialising arrays 
#shape(3,4) mean 3 rows and 4 columns
zero = np.zeros((3,4))

#creating arrays filled with ones - often used as starting points 
ones = np.ones((2,3,4))         #3D ARRAy: 2 LAYERS,3 ROWS , 4 columns

#Empty array - faster than zeros/ones bu contains values 
#use when you will immediately fill the array with real data 
empty = np.empty((2,2))

print("zeros array (3x4):\n",zero)
print("Ones array shape:", ones.shape)
print("Empty array (contains random values):\n",empty)

zeros array (3x4):
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Ones array shape: (2, 3, 4)
Empty array (contains random values):
 [[0.25 0.5 ]
 [0.75 1.  ]]


`zero()` and `ones()` are memory efficient ways to create arrays of specific sizes. `empty()` is fastest but contains garbage values, so only use it when you'll immediately overwrite the contents.

In [None]:
#Range arrays - like pythons range() but more powerful 
range_arr = np.arange(0,10,2)  #Start, stop, step:[0,2,4,6,8]
print("Range array", range_arr)

#Linearly spaced arrays - divide a range into equal parts
#from  0 to 1 with exactly 5 points (inclubing endpoints)
linspace_arr = np.linspace(0,1,5)
print("Logspace array: ",linspace_arr)

#logarithmically spaced arrays - useful for scientific data 
#from 10^0 to 10^2 (1 to 100) with 5 points 
logspace_arr = np.logspace(0,2,5)
print("Logspace array:", logspace_arr)

Range array [0 2 4 6 8]
Logspace array:  [0.   0.25 0.5  0.75 1.  ]


In [None]:
#identity matrix - diagonal of ones, zeros elsewhere 
#Essential for linear algebra operations 
identity = np.eye(4)         #4x4 identiti matrix 
#Diagonal matrix - put values on the diagonal 
diagonal = np.diag([1,2,3,4])

#Array filled with specific value 
full_arr = np.full((3,3),7)          #3x3 array filled with 7

print("Identity matrix:\n", identity)
print("Diagonal m,atrix:\n", diagonal)
print("full array (filled with 7):\n", full_arr)

Diagonal matrices are useful for scaling operations 

Numpy Data Types(dtypes)
    understanding data types is crutial for memory efficiency and numerical precision

In [None]:
#Explicit data types - control memory usage and precision 
int_arr = np.array([1,2,3],dtype=np.int32) #32-bit  integers
float_arr = np.array([1,2,3],dtype=np.float64) #64-bit floats (double precision)
bool_arr = np.array([True,False,True],dtype=np.bool_) #Boolean values 

#Type conversion - change dtypes of existing array 
converted = int_arr.astype(np.float32) #convert to 32-bit float
print("Integer array dtype:",int_arr.dtype)
print("float array dtype:",float_arr.dtype)
print("boolean array dtype:",bool_arr.dtype)
print("converted array dtype:",converted.dtype)

#Mmory usage comparison
print(f"int32 uses {int_arr.itemsize} bytes per element")
print(f"float64 uses {float_arr.itemsize} bytes per element")

Arrray properties & Attributes 
    # Understanding array properties helps you work effectively with data and debug issue 

In [None]:
# Create a sample 3D array for demonstration
# Think of this as 3 layers, each with 4 rows and 5 columns
arr= np.random.randn(3,4,5)

#shape:the dimension of the array (layers,row,columns)
print("shape:",arr.shape)

#size: Total number of elements(3x4x5 =60)
print("size:",arr.size)

#Ndim:number of dimensions(3Din this case )
print("ndim: ",arr.ndim)

#dtype:Data type of elements
print("Dtype: ",arr.dtype)

#itemsize: memory size of each element in bytes 
print("Itemsize: ",arr.itemsize)  #8 bytes for float

Array indexing and slicing 

Basic indexing - Accessing Individual Elements 

In [6]:
#1D array indexing - similar to python lists
arr1d= np.array([10,20,30,40,50]) 

print("first element: ",arr1d[0])       #indexing 0: 10
print("Last element:",arr1d[-1])        #Negative indexing:50 
print("slice [1:4]: ", arr1d[1:4])         #Elements 1,2,3: [20,30,40]
print("Every 2nd element: ",arr1d[::2])     #step of 2 [10,30,50]

first element:  10
Last element: 50
slice [1:4]:  [20 30 40]
Every 2nd element:  [10 30 50]


negative indices count from the end (-1 is last element)

In [8]:
#2D array indexing - row and column access 
arr2d = np.array([[1,2,3,4],
                  [5,6,7,8],
                  [9,10,11,12]])

#Acess specific element: [row,column]

print("Element at row 1, column 2:", arr2d[1, 2])        # 7

#access entire rows or colums 
print("first row: ",arr2d[0,:])     #all columns of row 0
print("second column:", arr2d[:,1])    #all rows of column 1

#subarray slicing: [row_start:rowend,col_start:col_end]
print("subarray (rows 1-2, col 1-2:):\n", arr2d[1:3,1:3])


Element at row 1, column 2: 7
first row:  [1 2 3 4]
second column: [ 2  6 10]
subarray (rows 1-2, col 1-2:):
 [[ 6  7]
 [10 11]]


The comma separates dimensions. : means "all elements along this dimension". Slicing creates views of the original data when possible, not copies.

**Advanced Indexing - Powerful Selection Methods**

In [None]:
#fancy indexing - use arrays of indices to select elements 
arr = np.array([10,20,30,40,50])
indices = np.array([0,2,4])  #select elements at positions 0,2,4
print("fancy indexing:", arr[indices])      

#this ia much more flexible than simple slicing 
random_indices = np.array([4,1,3,1]) #cn repeat and reorder 
print("random order:",arr[random_indices])

Fancy indexing lets you select elements in any order, repeat elements, and select non-contiguous elements. Very useful for data sampling and reordering.

In [None]:
#2d fancy indexing - select specific row/column combinations
arr2d = np.arange(12).reshape(3,4)  #3x4 array: [[0,1,2,3],[4,5,6,7],[8,9,10,11]]
print("Original 2D array:\n",arr2d)

#select elements at (row,col) pairs:(0,1) and (2,3)
rows = np.array([0,2])
cols =np.array([1,3])
print("Elements at (0,1) and (2,3):", arr2d[rows, cols])
#select entire rows using fancy indexing 
selected_rows= arr2d [[0,2],:]# rows 0 and 2 , all colums 
print("selected rows:\n",selected_rows)


When you provide arrays for both dimensions, NumPy pairs them element-wise. This is different from slicing, which creates a rectangular subarray.

**Array Reshaping & Manipulation**

- Reshaping changes how the same data is organized in memory without changing the actual values.

In [None]:
# Start with a 1D array
arr = np.arange(12)  # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
print("Original 1D array:", arr)

# Reshape to 2D: 3 rows × 4 columns
reshaped_2d = arr.reshape(3, 4)
print("Reshaped to 3x4:\n", reshaped_2d)

# Reshape to 3D: 2 layers × 2 rows × 3 columns  
reshaped_3d = arr.reshape(2, 2, 3)
print("Reshaped to 2x2x3:\n", reshaped_3d)

#use -1 to let numpy calculate one dimension auto matically 
auto_reshape  = arr.reshape(2,-1) #4 rows, numpy calc columns 
print("auto- reshaped to 4x?:\n", auto_reshape)


The total number of elements must remain the same (12 in this case). Using -1 tells NumPy to calculate that dimension automatically. Reshaping creates a view when possible, not a copy

In [25]:
#flattening - convert multi-dimensional array to 1d 
arr2d = np.array([[1,2,3],[4,5,6]])

#flatten() always return a copy 
flattened = arr2d.flatten()
print("flattened (copy)", flattened)

#Ravel() return a view if possible (faster,memory efficient)
ravel =arr2d.ravel()
print("Ravel (view if possible):", ravel)

# Demonstrate the difference
arr2d[0, 0] = 999
print("After modifying original:")
print("Flattened (unchanged):", flattened)          # Copy is independent
print("Ravel (changed):", ravel)           # View reflects changes

flattened (copy) [1 2 3 4 5 6]
Ravel (view if possible): [1 2 3 4 5 6]
After modifying original:
Flattened (unchanged): [1 2 3 4 5 6]
Ravel (changed): [999   2   3   4   5   6]


Use ravel() when you don't need to modify the flattened array independently. Use flatten() when you need a separate copy that won't be affected by changes to the original.

**Transposing and Swapping Axes**

 - Transposing is essential for matrix operations and changing data orientation
 

In [None]:
#2D transposition - flip rows and columns 
arr2d = np.array([[1,2,3],[4,5,6]])
print("original shape:",arr2d.shape)
print("Original:\n",arr2d)

print("transposed shape: ",arr2d.T.shape)
print("Transposed method :\n",arr2d.T)

#alternatively you can use 
print("Transpose method:\n", arr2d.transpose())

Transposing swaps rows and columns. This is crucial for matrix multiplication and when you need to change data orientation (e.g., from samples×features to features×samples)

In [None]:
# Higher-dimensional transposition
arrr3d= np.arange(24).reshape(2,3,4) #2 layers , 3 rows ,4 columns 
print("original 3D shape",arr3d)

#specify new axis order: (axis0,axis1,axis2)-- (axis2,axis0,axis1)
transposed_3d = arr3d.transpose(2, 0, 1)
print("Transposed 3D shape:", transposed_3d)  # (4, 2, 3)

# moveaxis is another way to rearrange axes
moved = np.moveaxis(arr3d, 0, -1)  # Move first axis to last position
print("Moveaxis result shape:", moved)

 For 3D+ arrays, you specify the new order of axes. This is useful for reshaping data for different algorithms or visualization requirements.

**Concatenating and Splitting Arrays**

- Combining and dividing arrays is fundamental for data manipulation

In [None]:
# Concatenation - joining arrays along existing axes
arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])

# Concatenate along different axes
concat_rows = np.concatenate([arr1, arr2], axis=0)    # Stack vertically (add rows)
concat_cols = np.concatenate([arr1, arr2], axis=1)    # Stack horizontally (add columns)

print("Original arrays:")
print("Array 1:\n", arr1)
print("Array 2:\n", arr2)
print("Concatenated vertically (axis=0):\n", concat_rows)
print("Concatenated horizontally (axis=1):\n", concat_cols)

`axis=0` means along rows (vertical stacking), `axis=1` means along columns (horizontal stacking). Arrays must have compatible shapes along the non-concatenated dimensions.

In [36]:
# Convenient stacking functions
vstack_result = np.vstack([arr1, arr2])     # Same as concatenate with axis=0
hstack_result = np.hstack([arr1, arr2])     # Same as concatenate with axis=1
dstack_result = np.dstack([arr1, arr2])     # Stack along depth (3rd dimension)

print("vstack (vertical):\n", vstack_result)
print("hstack (horizontal):\n", hstack_result)
print("dstack shape:", dstack_result)  # Creates 3D array

# Splitting arrays - opposite of concatenation
arr = np.arange(12).reshape(3, 4)
split_arrays = np.split(arr, 3, axis=0)        # Split into 3 parts along rows
print("Original array for splitting:\n", arr)
print("Split into 3 parts along rows:")
for i, split_part in enumerate(split_arrays):
    print(f"Part {i}:\n", split_part)

vstack (vertical):
 [[1 2]
 [3 4]
 [5 6]
 [7 8]]
hstack (horizontal):
 [[1 2 5 6]
 [3 4 7 8]]
dstack shape: [[[1 5]
  [2 6]]

 [[3 7]
  [4 8]]]
Original array for splitting:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Split into 3 parts along rows:
Part 0:
 [[0 1 2 3]]
Part 1:
 [[4 5 6 7]]
Part 2:
 [[ 8  9 10 11]]


Stacking functions are shortcuts for concatenation. Splitting divides an array into equal parts - useful for creating training/validation sets or processing data in chunks.

**Mathematical Operations**\        
**Element-wise Operations - The Power of Vectorization**

- NumPy's biggest advantage is performing operations on entire arrays without writing loop

In [None]:
# Basic arithmetic operations work element-by-element
arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([10, 20, 30, 40])

print("Array 1:", arr1)
print("Array 2:", arr2)

# All operations happen element-wise automatically
print("Addition:", arr1 + arr2)               # [11, 22, 33, 44]
print("Subtraction:", arr2 - arr1)            # [9, 18, 27, 36]
print("Multiplication:", arr1 * arr2)         # [10, 40, 90, 160]
print("Division:", arr2 / arr1)               # [10, 10, 10, 10]
print("Power:", arr1 ** 2)                    # [1, 4, 9, 16]
print("Modulo:", arr2 % 3)                    # [1, 2, 0, 1]

 This vectorization is much faster than Python loops because the operations are implemented in optimized C code. Each operation applies to corresponding elements.

In [37]:
# Operations with scalars - broadcasting in action
print("Scalar operations:")
print("Add 10 to all elements:", arr1 + 10)          # [11, 12, 13, 14]
print("Multiply all by 3:", arr1 * 3)               # [3, 6, 9, 12]
print("Divide all by 2:", arr1 / 2)                 # [0.5, 1, 1.5, 2]

# Compound operations
result = (arr1 + 5) * 2 - 1                         # ((arr1 + 5) * 2) - 1
print("Compound operation (arr1 + 5) * 2 - 1:", result)

Scalar operations:
Add 10 to all elements: [[11 12]
 [13 14]]
Multiply all by 3: [[ 3  6]
 [ 9 12]]
Divide all by 2: [[0.5 1. ]
 [1.5 2. ]]
Compound operation (arr1 + 5) * 2 - 1: [[11 13]
 [15 17]]


When you operate on arrays with scalars, the scalar is automatically "broadcast" to match the array shape. This is much more readable and efficient than manual loops

**Mathematical Functions - Beyond Basic Arithmetic**

- NumPy provides vectorized versions of most mathematical function

In [38]:
# Common mathematical functions
arr = np.array([1, 4, 9, 16, 25])
print("Original array:", arr)

# Square roots and powers
print("Square root:", np.sqrt(arr))           # [1, 2, 3, 4, 5]
print("Square:", np.square(arr))              # [1, 16, 81, 256, 625]
print("Cube root:", np.cbrt(arr))

# Exponential and logarithmic functions
small_arr = np.array([1, 2, 3])
print("Exponential:", np.exp(small_arr))      # [e^1, e^2, e^3]
print("Natural log:", np.log(arr))            # ln(arr)
print("Log base 10:", np.log10(arr))
print("Log base 2:", np.log2(arr))

Original array: [ 1  4  9 16 25]
Square root: [1. 2. 3. 4. 5.]
Square: [  1  16  81 256 625]
Cube root: [1.    1.587 2.08  2.52  2.924]
Exponential: [ 2.718  7.389 20.086]
Natural log: [0.    1.386 2.197 2.773 3.219]
Log base 10: [0.    0.602 0.954 1.204 1.398]
Log base 2: [0.    2.    3.17  4.    4.644]


These functions are much faster than applying Python's math functions in a loop. They also handle edge cases (like log of zero) more gracefully.

In [39]:
# Trigonometric functions - essential for signal processing and geometry
angles = np.array([0, np.pi/4, np.pi/2, np.pi])
print("Angles (radians):", angles)
print("Sine:", np.sin(angles))                # [0, √2/2, 1, 0]
print("Cosine:", np.cos(angles))              # [1, √2/2, 0, -1]
print("Tangent:", np.tan(angles))             # [0, 1, ∞, 0]

# Convert degrees to radians
degrees = np.array([0, 45, 90, 180])
radians = np.deg2rad(degrees)
print("Degrees to radians:", radians)

Angles (radians): [0.    0.785 1.571 3.142]
Sine: [0.    0.707 1.    0.   ]
Cosine: [ 1.     0.707  0.    -1.   ]
Tangent: [ 0.000e+00  1.000e+00  1.633e+16 -1.225e-16]
Degrees to radians: [0.    0.785 1.571 3.142]


Trigonometric functions expect angles in radians. Use `deg2rad()` and `rad2deg()` for conversions. These functions are essential for signal processing, computer graphics, and physics simulations.

In [42]:
# Rounding and comparison functions
decimals = np.array([1.234, 5.678, 9.999, -2.345])
print("Original decimals:", decimals)

print("Round to 2 places:", np.round(decimals, 2))
print("Floor (round down):", np.floor(decimals))     # [1, 5, 9, -3]
print("Ceiling (round up):", np.ceil(decimals))      # [2, 6, 10, -2]
print("Truncate (toward zero):", np.trunc(decimals)) # [1, 5, 9, -2]

# Absolute values and sign
print("Absolute values:", np.abs(decimals))
print("Sign (-1, 0, or 1):", np.sign(decimals))

Original decimals: [ 1.234  5.678  9.999 -2.345]
Round to 2 places: [ 1.23  5.68 10.   -2.35]
Floor (round down): [ 1.  5.  9. -3.]
Ceiling (round up): [ 2.  6. 10. -2.]
Truncate (toward zero): [ 1.  5.  9. -2.]
Absolute values: [1.234 5.678 9.999 2.345]
Sign (-1, 0, or 1): [ 1.  1.  1. -1.]


Different rounding functions serve different purposes. `floor()` always rounds down, `ceil()` always rounds up, `trunc()` removes the decimal part, and `round()` rounds to nearest value.

**Aggregate Functions - Summarizing Your Data**

    - Aggregate functions reduce arrays to summary statistics

In [43]:
# Create a 2D array for demonstration
arr = np.array([[1, 2, 3], 
                [4, 5, 6], 
                [7, 8, 9]])
print("Sample array:\n", arr)

# Aggregation across entire array
print("Sum of all elements:", np.sum(arr))        # 45
print("Mean of all elements:", np.mean(arr))      # 5.0
print("Standard deviation:", np.std(arr))         # 2.58
print("Minimum value:", np.min(arr))              # 1
print("Maximum value:", np.max(arr))              # 9

Sample array:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
Sum of all elements: 45
Mean of all elements: 5.0
Standard deviation: 2.581988897471611
Minimum value: 1
Maximum value: 9


In [44]:
# Axis-specific aggregation - this is where NumPy shines!
print("Sum along axis 0 (columns):", np.sum(arr, axis=0))  # [12, 15, 18]
print("Sum along axis 1 (rows):", np.sum(arr, axis=1))     # [6, 15, 24]

print("Mean along axis 0:", np.mean(arr, axis=0))          # [4, 5, 6]
print("Mean along axis 1:", np.mean(arr, axis=1))          # [2, 5, 8]

# Finding positions of extreme values
print("Position of max (flattened):", np.argmax(arr))      # 8 (element 9 at position 8)
print("Position of max along axis 0:", np.argmax(arr, axis=0))  # [2, 2, 2]
print("Position of max along axis 1:", np.argmax(arr, axis=1))  # [2, 2, 2]

Sum along axis 0 (columns): [12 15 18]
Sum along axis 1 (rows): [ 6 15 24]
Mean along axis 0: [4. 5. 6.]
Mean along axis 1: [2. 5. 8.]
Position of max (flattened): 8
Position of max along axis 0: [2 2 2]
Position of max along axis 1: [2 2 2]
