NumPy (Numerical Python) is a core Python library for numerical and scientific computing.
- A powerful multi-dimensional array object (ndarray)
- Fast mathematical operations
- Vectorized computation (no Python loops)
- The foundation for libraries like Pandas, SciPy, TensorFlow, PyTorch

In [1]:
# List 

data = [1, True, 'C', 3.14]

lst = [1,2,3,4]
# Problems
# mixed types
# each element is Separate python Object 
# referenced, garbage collected, pointer based

In [2]:
# NumPy array
import numpy as np
arr = np.array([1, 2, 3, 4])

# Stores same type only
# Stored in continuous memory
# Very fast


In [3]:
import sys
# don't stick to the size of list in python with this function..
print (sys.getsizeof(lst)) # not like original bytes, list size won't be perfect

print (arr.dtype )
print (sys.getsizeof(arr))  

# both list and numpy has headers/also called metadata like datatype, shape/dimension

88
int64
144


In [4]:
lst = [1, 2, 3, 4, 5]
 
print (sys.getsizeof(arr))  

print(arr.nbytes) # data size

144
32


In [6]:
sys.getsizeof(1)  # 28 bytes

28

In [5]:
lst = [1, 2, 3, 4, 5]
 
print (sys.getsizeof(arr))  

print(arr.nbytes) # data size

144
32


In [7]:
# List logic

lst = [1, 2, 3, 4]
total = sum(lst)
print (total)

10


Pseudo-logic in cPython, not perfect code, just for reference

total = 0 (PyLongObject)

for each element pointer in list:
    load pointer
    check object type (may throw error if type mismatch)
    call PyNumber_Add(total, element) (internal function)
    create NEW PyLongObject for result 
    decrease refcount of old total (for garbage collection/Pseudo)
    assign pointer to new total

How list is stored?

list
 ├── ptr → PyLongObject(1)
 ├── ptr → PyLongObject(2)
 ├── ptr → PyLongObject(3)
 ├── ptr → PyLongObject(4)


How PyLongObject defined at C level

// CPython source (C code)
typedef struct {
    PyObject_HEAD
    digit ob_digit[1];
} PyLongObject;

 

In [8]:
import numpy as np

# numpy store the data in native datatype, not an object
# numpy int32 takes only 4 bytes
arr = np.array([1, 2, 3])
# print address of the object
print(hex(id(arr)))

0x108931e30


In [9]:
print(hex(arr.__array_interface__['data'][0]))
print (arr.__array_interface__['data'])
print (arr.__array_interface__['data'][0])

# data[0]	Memory address of the raw buffer
# data[1]	Read-only flag

0x14e61adb0
(5609991600, False)
5609991600


In [10]:
# view
# Zero copy
# direct reference
mv = memoryview(arr)
print(mv[0])

1


In [11]:
# Like c bytes

# Gives raw bytes
# Useful for IO, networking
# Not human-readable

raw = arr.tobytes()
print(raw)

b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00'


In [14]:
# Numpy values are mutable inplace, means we can change the values inside numpy
# but we cannot add new element to existing numpy array or remove an element from numpy array

import numpy as np

a = np.array([10, 20, 30])
print(a)
# mutate the value inplace
a[1] = 99

print(a)
# a.append(40)   #  Error, no function called append

[10 20 30]
[10 99 30]


In [18]:
# note, it is np python module, 
# it copy elements from a, create new numpy object, with 40 at end
b = np.append(a, 40)
print (a) # no change
print (b) # all of a copied element then 40

# similarlay delete, copy all the eelement into new numpy, leave the element that to be deleted
print ("===========---delete--============")

c  = np.delete(a, 0) # 0 is index number, not the int value

print (a)
print (c)

[10 99 30]
[10 99 30 40]
[10 99 30]
[99 30]


In [19]:
a = np.array([1, 2, 3])
# Numpy size is fixed
a[0] = 100   # allowed, index within range
a[3] = 4     # IndexError, out of range

IndexError: index 3 is out of bounds for axis 0 with size 3

In [20]:
# vectorized computing

import numpy as np

a = np.array([10, 20, 30, 40], dtype=np.int32)
print("Original:", a)
print("dtype:", a.dtype)

Original: [10 20 30 40]
dtype: int32


In [21]:
# add value in place to all
a += 1
print("After +1:", a)

After +1: [11 21 31 41]


In [22]:
a -= 2 # in place substraction
print("After -2:", a)

After -2: [ 9 19 29 39]


In [23]:
a *= 3 # mul, inplace

print("After *3:", a)

After *3: [ 27  57  87 117]


In [None]:
a /= 4
print("After /4:", a)
print("dtype:", a.dtype)

# error is expected, / by default expect a float, a default 

UFuncTypeError: Cannot cast ufunc 'divide' output from dtype('float64') to dtype('int32') with casting rule 'same_kind'

In [26]:
a //= 4 # NOTE // int division
print("After //4:", a)
print("dtype:", a.dtype)

After //4: [ 6 14 21 29]
dtype: int32


In [28]:
# safe, create new numpy, no inplace change
b = a / 4
print(b)
print(b.dtype) # note, float64, 8 bytes


[1.5  3.5  5.25 7.25]
float64


In [29]:
# or convert numpy int32 to float32

b = a.astype(np.float32)
print(b.dtype)
print ("before", b)
b /= 4
print ("after", b)

print ("a data type, value remain same", a, a.dtype)

float32
before [ 6. 14. 21. 29.]
after [1.5  3.5  5.25 7.25]
a data type, value remain same [ 6 14 21 29] int32


In [None]:
import numpy as np
# aggregate functions
a = np.array([12, 25, 7, 30, 18])
print(a)

print("Min:", a.min())
print("Max:", a.max())
print("Sum:", a.sum())
print("Average:", a.mean())
# print("Mode:", a.m())


[12 25  7 30 18]
Min: 7
Max: 30
Sum: 92
Average: 18.4


AttributeError: 'numpy.ndarray' object has no attribute 'mode'

In [33]:
# get total elements in the numpy
# size of each axis
a.shape

(5,)

In [35]:
# Dimension (ndim) How many axis the array has

# 1D array example

a = np.array([10, 20, 30, 40])

print (a.ndim) # 1, 1 dimention or 1 axis
print (a.shape) # (4, ), 4 elements

print ("-" * 10)

# access element
print(a[0])   # 10

# mutate vlaue
a[0] = 99

print(a[0])   # 99

1
(4,)
----------
10
99


In [36]:
 # 2D array

b = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

print (b.ndim) # 2 dimension, 2d
print (b.shape) # (2, 3), 2 rows, 3 columns

print (b)

# access value
print(b[0, 0])   # row 0, col 0 → 1
print(b[1, 2])   # row 1, col 2 → 6

# mutate

b[0, 1] = 99
print(b)



2
(2, 3)
[[1 2 3]
 [4 5 6]]
1
6
[[ 1 99  3]
 [ 4  5  6]]


In [37]:
# 3D array example (for clarity)
# MAX 32 Dimentions supported (32 axes)
c = np.array([
    [[1, 2], [3, 4]],
    [[5, 6], [7, 8]]
])

print (c.ndim) # 3d

print (c.shape) #  (x, y, z), context based, (frames, height, width), 
# for ML (batch, channels, features), we cannot call it like row and col for 3 dimention

print(c[0, 1, 0])   # 3
c[1, 0, 1] = 99
print(c)


3
(2, 2, 2)
3
[[[ 1  2]
  [ 3  4]]

 [[ 5 99]
  [ 7  8]]]


In [41]:
# import numpy as np

a = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
print(a)
o = a%2 == 1
print(o)
odd = a[a % 2 == 1]
print(odd)


[0 1 2 3 4 5 6 7 8 9]
[False  True False  True False  True False  True False  True]
[1 3 5 7 9]


In [42]:
# understanding MASK
odd = np.arange(10)[np.arange(10) % 2 == 1]
print (odd)
# confusing, is it not?

[1 3 5 7 9]


In [43]:
# MASK
odd = np.arange(10)
print (odd)
# try to change the values True and False based on index, see how mask working
mask = [True, False, True, True, False, False, True, False, True, False]
o = odd [mask]
print (o)

[0 1 2 3 4 5 6 7 8 9]
[0 2 3 6 8]


In [44]:
import numpy as np
# int8, 8 bits, 1 bute
# int16,
# int32,
# int64
a = np.array([1, 2, 3, 4], dtype=np.int8)
print(a)
print(a.dtype)

[1 2 3 4]
int8


In [45]:
b = np.array([True, False, True, False], dtype=np.bool_)
print(b)
print(b.dtype)

[ True False  True False]
bool


Python built-in |	NumPy dtype
int             | 	np.int_
float	        |    np.float_
bool	        |    np.bool_

In [46]:
# vectorized operation, you don't write loops

import numpy as np

a = np.array([10, 20, 30, 40])
b = np.array([1, 2, 3, 4])

c = a * 2 + b
print(c)

# No for loop
# Fast (C-level)
# Clean and readable


[21 42 63 84]


In [47]:
# vectorized operation, BOOLEAN MASKING

scores = np.array([35, 55, 70, 90, 42])

passed = scores[scores >= 50]
print(passed)

# we already discussed what is masking means

[55 70 90]


In [48]:
# NumPy broadcasting is a rule that allows arrays of different shapes 
# to be used together in arithmetic operations.
# say, 10 scalar is used over array
prices = np.array([100, 200, 300])
discounted = prices - 10
print(discounted)

# technically, the 10 is broadcast into another number / internally
# 10 is broadcast to [10, 10, 10]
# No copy is actually made
# Operation is vectorized


[ 90 190 290]


In [50]:
# NumPy broadcasting is a rule that allows arrays of different shapes 
# to be used together in arithmetic operations.
# say, 10 scalar is used over array
prices = np.array([100, 200, 300])
discounted = prices - 10
print(discounted)

# technically, the 10 is broadcast into another number / internally
# 10 is broadcast to [10, 10, 10]
# No copy is actually made
# Operation is vectorized


[ 90 190 290]


In [49]:
# broad cast 1D + 1D

a = np.array([1, 2, 3])
b = np.array([10])

print(a + b)

# a has shape (3,), b has shape (1,)
# now, b broadcast to (3,)


[11 12 13]


In [51]:
# 2D + 1D
matrix = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

vector = np.array([10, 20, 30])

print(matrix + vector)

[[11 22 33]
 [14 25 36]]


In [53]:
# Views - Zero copy slicing

a = np.array([1, 2, 3, 4])
# b is view, created from a
b = a[1:3]

print ("a", a)
print ("b", b)
# change a[2]
a[2] = 20
# b.append(90)
print ("--after change--")
print ("a", a)
print ("b", b)
# you can see both a and b values shows 20, since b is a view

a [1 2 3 4]
b [2 3]
--after change--
a [ 1  2 20  4]
b [ 2 20]


In [54]:
# view STEP slicing or stride (stride means step)
# SLICING & VIEW
a = np.array([1, 2, 3, 4, 5, 6])
b = a[::2]   # every second element

print (a)
print (b)

b[0] = 100

print(a)


[1 2 3 4 5 6]
[1 3 5]
[100   2   3   4   5   6]


Operation is that	View or Copy?

a[1:5]	==> View

a[::2]		==> View

a[:, 1]		==> View [for 2d, take all rows, but only column index 1]

a[a > 2]		==> Copy

a[a % 2 == 0] 	==>	 Copy

a[[1, 3, 5]]		==>  Copy