<h1>Create NumPy arrays using Python's "array like" data types</h1>


In [1]:
import numpy as np 
print(np.__version__)

1.16.4


In [2]:
my_list = [-17, 0, 4, 5, 29]
np_array=np.array(my_list)
print(np_array) 

[-17   0   4   5  29]


## multiply * works differently between python types and np typy

In [3]:
my_list_10x=my_list*10
print(my_list_10x)
np_array_10x=np_array*10
print(np_array_10x)

[-17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29, -17, 0, 4, 5, 29]
[-170    0   40   50  290]


##complex and tuple

In [4]:
tupleToNumpy=(14, -3.14, 4+3j); tupleToNumpy2=(-14, 3.21)  #int, float, complex, np array keeps elements of array uniform
numpyFromTuple=np.array(tupleToNumpy)
numpyFromTuple2=np.array(tupleToNumpy2)
print(numpyFromTuple)
print(numpyFromTuple2)


[14.  +0.j -3.14+0.j  4.  +3.j]
[-14.     3.21]


##simple methods

In [5]:
# numpy.arange([start, ]stop, [step, ]dtype=None) official url by google np.arange
np.arange(10,30,2)*10-3

array([ 97, 117, 137, 157, 177, 197, 217, 237, 257, 277])

In [6]:
np.arange(20,step=4)

array([ 0,  4,  8, 12, 16])

In [7]:
nparray=(np.arange(10,30,2)-3)*10
nparray

array([ 70,  90, 110, 130, 150, 170, 190, 210, 230, 250])

In [8]:
nparray.size # this only gives number of elements

10

In [9]:
np.linspace(0,30,10,True)

array([ 0.        ,  3.33333333,  6.66666667, 10.        , 13.33333333,
       16.66666667, 20.        , 23.33333333, 26.66666667, 30.        ])

##indexing, shaping (dimension) and slicing

In [10]:
z=np.zeros(20)
z[10]

0.0

In [11]:
z[10]=9;z[0]=10; z[-1]=7.5; z[-2]=-2.1
z

array([10. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  9. ,
        0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. , -2.1,  7.5])

In [12]:
o=np.ones((3,3), dtype=int)
o

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [13]:
z.shape=(4,5)
z

array([[10. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ],
       [ 9. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. , -2.1,  7.5]])

In [14]:
z.shape=(2,2,5)   #convert from 2-d to 3-d np array
z

array([[[10. ,  0. ,  0. ,  0. ,  0. ],
        [ 0. ,  0. ,  0. ,  0. ,  0. ]],

       [[ 9. ,  0. ,  0. ,  0. ,  0. ],
        [ 0. ,  0. ,  0. , -2.1,  7.5]]])

In [15]:
z[1][1]

array([ 0. ,  0. ,  0. , -2.1,  7.5])

In [16]:
z[1,0][0]

9.0

In [17]:
z[0]

array([[10.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [18]:
print(z)
print(z.ndim) #show dimension
print(z.size) #show total elements
print(z[0].shape) #show actual metrics shape
print(z.dtype) # show data type in the metrix
print(z.sum()) # sum all the elements in metrix. this can be used in combination with filters(boolean mask array below
print(z.sum(axis=1)) # will learn this in broadcasting - operation along axis

[[[10.   0.   0.   0.   0. ]
  [ 0.   0.   0.   0.   0. ]]

 [[ 9.   0.   0.   0.   0. ]
  [ 0.   0.   0.  -2.1  7.5]]]
3
20
(2, 5)
float64
24.4
[[10.   0.   0.   0.   0. ]
 [ 9.   0.   0.  -2.1  7.5]]


## boolean mask array

In [19]:
nparray

array([ 70,  90, 110, 130, 150, 170, 190, 210, 230, 250])

In [20]:
can_div_35_mask = 0 == (nparray % 35)
can_div_35_mask

array([ True, False, False, False, False, False, False,  True, False,
       False])

In [21]:
filtered_nparray = nparray[can_div_35_mask]
filtered_nparray

array([ 70, 210])

In [22]:
further_filtered_nparray=filtered_nparray[filtered_nparray>70]
further_filtered_nparray

array([210])

In [23]:
nparray[nparray>200]  #simpler filter

array([210, 230, 250])

In [24]:
can_div_15_mask = 0 == (nparray % 15)
print(can_div_15_mask)
print(can_div_35_mask)

[False  True False False  True False False  True False False]
[ True False False False False False False  True False False]


In [25]:
#boolean operator
can_div_15_35_mask=np.logical_and(can_div_15_mask, can_div_35_mask)
can_div_15_35_mask

array([False, False, False, False, False, False, False,  True, False,
       False])

In [26]:
nparray[can_div_15_35_mask]

array([210])

## Broadcasting

In [27]:
array_3d = np.arange(70)
array_3d.shape=(2,7,5)
array_3d

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34]],

       [[35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59],
        [60, 61, 62, 63, 64],
        [65, 66, 67, 68, 69]]])

In the context of deep learning, we also use some less conventional notation. We allow the addition of matrix and a vector, yielding another matrix: C = A + b, where Ci,j = Ai,j + bj. In other words, the vector b is added to each row of the matrix. This shorthand eliminates the need to define a matrix with b copied into each row before doing the addition. This implicit copying of b to many locations is called broadcasting.

In [28]:
x=np.arange(4)
xx = x.reshape(4,1)
y = np.ones(5)
z = np.ones((3,4))
print('x=', x)
print('')
print("\033[1;31;46m reshape from 1x4 to 4x1 ") #color reference from https://ozzmaker.com/add-colour-to-text-in-python/ 
print("\033[1;31;0m ")
print('xx=', xx)
print('y=',y)
print(y+xx)

('x=', array([0, 1, 2, 3]))

[1;31;46m reshape from 1x4 to 4x1 
[1;31;0m 
('xx=', array([[0],
       [1],
       [2],
       [3]]))
('y=', array([1., 1., 1., 1., 1.]))
[[1. 1. 1. 1. 1.]
 [2. 2. 2. 2. 2.]
 [3. 3. 3. 3. 3.]
 [4. 4. 4. 4. 4.]]


In [29]:
xx-x

array([[ 0, -1, -2, -3],
       [ 1,  0, -1, -2],
       [ 2,  1,  0, -1],
       [ 3,  2,  1,  0]])

To not use broadcasting, manual conversion is beow

In [0]:
x2d=[0, 1, 2, 3]*4
x2dnp=np.array(x2d).reshape(4,4)

In [0]:
lista=[0,0,0,0]
listb=[1,1,1,1]
listc=[2,2,2,2]
listd=[3,3,3,3]

xx2dnp = np.array(lista+listb+listc+listd).reshape(4,4)

In [32]:
xx2dnp-x2dnp

array([[ 0, -1, -2, -3],
       [ 1,  0, -1, -2],
       [ 2,  1,  0, -1],
       [ 3,  2,  1,  0]])

### operations along axis

In [33]:
array_3d

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34]],

       [[35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59],
        [60, 61, 62, 63, 64],
        [65, 66, 67, 68, 69]]])

In [34]:
array_3d.shape

(2, 7, 5)

In [35]:
array_3d.sum(axis=0) # this is array_3d[0]+array_3d[1]

array([[ 35,  37,  39,  41,  43],
       [ 45,  47,  49,  51,  53],
       [ 55,  57,  59,  61,  63],
       [ 65,  67,  69,  71,  73],
       [ 75,  77,  79,  81,  83],
       [ 85,  87,  89,  91,  93],
       [ 95,  97,  99, 101, 103]])

In [36]:
array_3d.sum(axis=1) # sum of each column (7 elements) (axis is array_3d.shape[1])

array([[105, 112, 119, 126, 133],
       [350, 357, 364, 371, 378]])

In [37]:
array_3d.sum(axis=2) # sum of each row (5 elements) (axis is array_3d.shape[2])

array([[ 10,  35,  60,  85, 110, 135, 160],
       [185, 210, 235, 260, 285, 310, 335]])

Another example

In [38]:
array2d_a = np.arange(35, dtype=int).reshape(5,7)
array2d_b = np.ones((5,7), dtype=float)*7
array2d_a + array2d_b #the result would be float

array([[ 7.,  8.,  9., 10., 11., 12., 13.],
       [14., 15., 16., 17., 18., 19., 20.],
       [21., 22., 23., 24., 25., 26., 27.],
       [28., 29., 30., 31., 32., 33., 34.],
       [35., 36., 37., 38., 39., 40., 41.]])

In [39]:
Vector1 = np.random.random(7)
np.set_printoptions(precision=4)
Vector1

array([0.0637, 0.9361, 0.6502, 0.1266, 0.2959, 0.284 , 0.4711])

In [40]:
array2d_a + array2d_b + Vector1

array([[ 7.0637,  8.9361,  9.6502, 10.1266, 11.2959, 12.284 , 13.4711],
       [14.0637, 15.9361, 16.6502, 17.1266, 18.2959, 19.284 , 20.4711],
       [21.0637, 22.9361, 23.6502, 24.1266, 25.2959, 26.284 , 27.4711],
       [28.0637, 29.9361, 30.6502, 31.1266, 32.2959, 33.284 , 34.4711],
       [35.0637, 36.9361, 37.6502, 38.1266, 39.2959, 40.284 , 41.4711]])

In [41]:
Vector2 = np.random.random(5)
Vector2

array([0.7409, 0.9557, 0.7595, 0.5642, 0.0525])

In [42]:
array2d_a + array2d_b + Vector2 # dimension is important

ValueError: ignored

In [0]:
Vector2 = np.random.random((5,1))*10
Vector2

In [0]:
array2d_a + array2d_b + Vector2

## Structured Arrays

In [43]:
student_def=[('name','S6'),('height','f8'),('weight','f8'),('age','i8')]
student_def

[('name', 'S6'), ('height', 'f8'), ('weight', 'f8'), ('age', 'i8')]

In [46]:
student_array = np.ones((4,), dtype=student_def) # initiate 4 records with ones
student_array

array([('1', 1., 1., 1), ('1', 1., 1., 1), ('1', 1., 1., 1),
       ('1', 1., 1., 1)],
      dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [49]:
# add some data
student_array[3] = ('Tyrion', 73, 205, 14)
student_array[0] = ('Billy', 65, 112, 13)
print(student_array)

[('Billy', 65., 112., 13) ('1',  1.,   1.,  1) ('1',  1.,   1.,  1)
 ('Tyrion', 73., 205., 14)]


In [56]:
# get columes
print(student_array['name'])
print(student_array['age']/2) #note the integer type is kept

['Billy' '1' '1' 'Tyrion']
[6 0 0 7]


3D array can also use array data type

In [66]:
student_3d_array = np.zeros((4,3,2), dtype=student_def)
#add some data and get them
student_3d_array[0,2,1] = ('Billy', 68, 155, 11)
student_3d_array[2,2,1] = ('Alice', 62, 165, 12.3)
student_3d_array[3,1,0] = ('Allison', 62, 166, 15.5) # extra string would be truncated 

student_3d_array

array([[[('',  0.,   0.,  0), ('',  0.,   0.,  0)],
        [('',  0.,   0.,  0), ('',  0.,   0.,  0)],
        [('',  0.,   0.,  0), ('Billy', 68., 155., 11)]],

       [[('',  0.,   0.,  0), ('',  0.,   0.,  0)],
        [('',  0.,   0.,  0), ('',  0.,   0.,  0)],
        [('',  0.,   0.,  0), ('',  0.,   0.,  0)]],

       [[('',  0.,   0.,  0), ('',  0.,   0.,  0)],
        [('',  0.,   0.,  0), ('',  0.,   0.,  0)],
        [('',  0.,   0.,  0), ('Alice', 62., 165., 12)]],

       [[('',  0.,   0.,  0), ('',  0.,   0.,  0)],
        [('Alliso', 62., 166., 15), ('',  0.,   0.,  0)],
        [('',  0.,   0.,  0), ('',  0.,   0.,  0)]]],
      dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [67]:
student_3d_array[['name','weight','age']]

array([[[('',   0.,  0), ('',   0.,  0)],
        [('',   0.,  0), ('',   0.,  0)],
        [('',   0.,  0), ('Billy', 155., 11)]],

       [[('',   0.,  0), ('',   0.,  0)],
        [('',   0.,  0), ('',   0.,  0)],
        [('',   0.,  0), ('',   0.,  0)]],

       [[('',   0.,  0), ('',   0.,  0)],
        [('',   0.,  0), ('',   0.,  0)],
        [('',   0.,  0), ('Alice', 165., 12)]],

       [[('',   0.,  0), ('',   0.,  0)],
        [('Alliso', 166., 15), ('',   0.,  0)],
        [('',   0.,  0), ('',   0.,  0)]]],
      dtype={'names':['name','weight','age'], 'formats':['S6','<f8','<i8'], 'offsets':[0,14,22], 'itemsize':30})

Record arrays are structured arrays wrapped using a subclass of ndarray, numpy. recarray , which allows field access by attribute on the array object, and record arrays also use a special datatype, numpy. record , which allows field access by attribute on the individual elements of the array. The numpy.

In [71]:
studen_record_array = np.rec.array([('Joe', 73, 205, 34),('Mirranda', 65, 112, 23)],dtype=student_def)
studen_record_array

rec.array([('Joe', 73., 205., 34), ('Mirran', 65., 112., 23)],
          dtype=[('name', 'S6'), ('height', '<f8'), ('weight', '<f8'), ('age', '<i8')])

In [70]:
# get the attribute
studen_record_array.age

array([34, 23])