Import Numpy before use

In [1]:
import numpy as np

---

create an array and use basic attributes.

- Accessing the array.
- dimension
- shape
- size (total number of entries)
- data type of array


In [2]:
a = np.array([[1, 2., 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
a

array([[ 1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.],
       [ 9., 10., 11., 12.]])

In [3]:
a[1,3]

np.float64(8.0)

In [4]:
a.ndim

2

In [5]:
a.shape

(3, 4)

In [6]:
a.size

12

In [7]:
a.dtype

dtype('float64')

---
Create a basic array use of `np.zeros` , `np.ones` , `np.empty`, `np.arange`

In [8]:
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [9]:
np.ones((2,2,5))

array([[[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]],

       [[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]]])

In [10]:
np.empty((2,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [11]:
np.arange(4)

array([0, 1, 2, 3])

In [12]:
np.arange(4,11,3)

array([ 4,  7, 10])

In [13]:
np.linspace(3,12,num=9)

array([ 3.   ,  4.125,  5.25 ,  6.375,  7.5  ,  8.625,  9.75 , 10.875,
       12.   ])

In [14]:
np.ones(5,dtype=np.int32)

array([1, 1, 1, 1, 1], dtype=int32)

In [15]:
np.full(4,2)

array([2, 2, 2, 2])

In [16]:
np.full((2,4),12)

array([[12, 12, 12, 12],
       [12, 12, 12, 12]])

---

## Basic array operations

In [24]:
a = np.array([[2,5],[4,1],[1,2]])
b = np.ones((3,2))
print("array a : ")
print(a)
print("array b : ")
print(b)

array a : 
[[2 5]
 [4 1]
 [1 2]]
array b : 
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [25]:
a + b

array([[3., 6.],
       [5., 2.],
       [2., 3.]])

In [26]:
a - b

array([[1., 4.],
       [3., 0.],
       [0., 1.]])

In [28]:
a * a

array([[ 4, 25],
       [16,  1],
       [ 1,  4]])

In [29]:
a/a

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

In [30]:
a.sum()

np.int64(15)

In [31]:
a.sum(axis=0)

array([7, 8])

In [32]:
a.sum(axis=1)

array([7, 5, 3])

---

### Broadcasting


In [34]:
data = np.array([1,2])

In [35]:
data * 1.6

array([1.6, 3.2])

In [50]:
a = np.array([[ 0.0,  0.0,  0.0],
              [10.0, 10.0, 10.0],
              [20.0, 20.0, 20.0],
              [30.0, 30.0, 30.0]])
b = np.array([1.0, 2.0,3.0,4.0])

b = b[:,np.newaxis]
b

array([[1.],
       [2.],
       [3.],
       [4.]])

In [51]:
a + b

array([[ 1.,  1.,  1.],
       [12., 12., 12.],
       [23., 23., 23.],
       [34., 34., 34.]])

---
here we are trying to find in which category this particulr athelet fits in

In [97]:
# here observation has height and weight for a singly athlete and codes have for different type of athletes
observation = np.array([111.0, 188.0])
codes = np.array([[102.0, 203.0],
               [132.0, 193.0],
               [45.0, 155.0],
               [57.0, 173.0]])

In [98]:
# we have to find the which type this athlete is nearest.
diff = codes - observation
print(diff)
dist = np.sqrt(np.sum(diff**2,axis=-1))
dist

[[ -9.  15.]
 [ 21.   5.]
 [-66. -33.]
 [-54. -15.]]


array([17.49285568, 21.58703314, 73.79024326, 56.04462508])

In [99]:
np.argmin(dist)

np.int64(0)

what if we use multiple observations, due to mismatch at index 1 from right ( 4 and 2 ) in shapes we cant broadcast it

In [100]:
observations = np.array([[111.0, 188.0],[120,134]]) # shape : 2,2
codes = np.array([[102.0, 203.0],
               [132.0, 193.0],
               [45.0, 155.0],
               [57.0, 173.0]]) # shape : 4,2

In [101]:
diff = codes - observations
# this will end in an error

ValueError: operands could not be broadcast together with shapes (4,2) (2,2) 

To solve this we can use broadcasting on codes

In [102]:
diff = codes[:,np.newaxis,:] - observations
print(diff)
print(diff.shape)
dist = np.sqrt(np.sum(diff**2,axis=-1))
dist

[[[ -9.  15.]
  [-18.  69.]]

 [[ 21.   5.]
  [ 12.  59.]]

 [[-66. -33.]
  [-75.  21.]]

 [[-54. -15.]
  [-63.  39.]]]
(4, 2, 2)


array([[17.49285568, 71.30918594],
       [21.58703314, 60.20797289],
       [73.79024326, 77.88452991],
       [56.04462508, 74.09453421]])

In [107]:
np.argmin(dist,axis=0)

array([0, 1])

But this operation creates a temp array of 4,2,2 shape which is memory inefficient on large datasets we can solve this by using for loop as well this will be slower then numpy `c` operations but efficient in terms of memory.

In [108]:
ls = list()
for obs in observations:
    diff = codes - obs
    dist = np.sqrt(np.sum(diff**2, axis=-1))
    ls.append(dist)
ls = np.array(ls)

In [109]:
ls

array([[17.49285568, 21.58703314, 73.79024326, 56.04462508],
       [71.30918594, 60.20797289, 77.88452991, 74.09453421]])

In [113]:
np.argmin(ls,axis=1)

array([0, 1])