# 4  NumPy Basics: Arrays and Vectorized Computation

In [1]:
import numpy as np

elems = 10**6
np_arr = np.arange(elems)
l_arr = list(range(elems))


#### cmp np.array and list

In [2]:
%time for i in range(10): np_arr * 2 

CPU times: user 18.6 ms, sys: 4.7 ms, total: 23.3 ms
Wall time: 21.7 ms


In [3]:
%time for i in range(10): [x * 2 for x in l_arr]

CPU times: user 599 ms, sys: 113 ms, total: 712 ms
Wall time: 709 ms


## Multidimensional array

In [4]:
import numpy as np

In [5]:
data = np.random.randn(2,3) # normal distribution
data 

array([[-1.26669157, -0.05402559,  0.4242701 ],
       [-0.1082556 , -0.06226018,  1.02021738]])

In [6]:
mu = 3000
sigma = 10
data * sigma + mu # normal distribution

array([[2987.33308432, 2999.45974408, 3004.242701  ],
       [2998.91744402, 2999.37739818, 3010.20217381]])

In [7]:
data + data

array([[-2.53338314, -0.10805118,  0.8485402 ],
       [-0.2165112 , -0.12452036,  2.04043476]])

In [8]:
data.shape

(2, 3)

In [9]:
data.dtype

dtype('float64')

In [10]:
type(data)

numpy.ndarray

## Creating an ndarray

In [11]:
data_l = [1,2,3.3,4.4]
data_np = np.array(data_l)
print(data_np.shape)
print(data_np.dtype)
print(data_np.ndim)
data_np

(4,)
float64
1


array([1. , 2. , 3.3, 4.4])

In [12]:
data_l = [[1,2,3.3,4.4], [5,6,7,8]]
data_np = np.array(data_l)
print(data_np.shape)
print(data_np.dtype)
print(data_np.ndim)
data_np

(2, 4)
float64
2


array([[1. , 2. , 3.3, 4.4],
       [5. , 6. , 7. , 8. ]])

In [13]:
np.zeros((3, 10))

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [14]:
np.empty((2, 3, 1)) # garbage vals

array([[[2.53338314],
        [0.10805118],
        [0.8485402 ]],

       [[0.2165112 ],
        [0.12452036],
        [2.04043476]]])

In [15]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [16]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [17]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [18]:
np.full_like(data_np, np.nan)

array([[nan, nan, nan, nan],
       [nan, nan, nan, nan]])

In [19]:
my_array1 = np.array([1,2,3.,4.])
my_array2 = np.array([1,2,3.,4.], dtype='int32')

In [20]:
print(my_array1.dtype)
print(my_array2.dtype)

float64
int32


## np.astype()

In [21]:
my_array1.astype('int32')

array([1, 2, 3, 4], dtype=int32)

# Arithmetic with NumPy Arrays

In [22]:
arr = np.array([[1,2,3],[4,5,6.]], dtype='float64')
arr.dtype

dtype('float64')

In [23]:
arr + arr

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [24]:
arr * arr # No hace el cálculo matricial

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [25]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [26]:
arr/arr

array([[1., 1., 1.],
       [1., 1., 1.]])

In [27]:
1/arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [28]:
arr**2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [29]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [30]:
arr2 = 3*arr
arr2

array([[ 3.,  6.,  9.],
       [12., 15., 18.]])

In [31]:
arr2>arr

array([[ True,  True,  True],
       [ True,  True,  True]])

In [32]:
arr2<arr

array([[False, False, False],
       [False, False, False]])

## Basic indexing and slicing

In [33]:
arr = np.arange(5)

In [34]:
arr

array([0, 1, 2, 3, 4])

In [35]:
arr[2]

2

In [36]:
arr[3:]

array([3, 4])

In [37]:
len(arr)

5

In [38]:
arr[3:4]

array([3])

In [39]:
arr[2:5] = 33

In [40]:
arr

array([ 0,  1, 33, 33, 33])

In [41]:
arr2 = arr[0:2]
arr2

array([0, 1])

In [42]:
arr2[0] = 99
arr

array([99,  1, 33, 33, 33])

In [43]:
arr2[:] = 5555
arr

array([5555, 5555,   33,   33,   33])

In [44]:
arr3 = arr.copy()
arr4 = arr3[0:2]
arr4[:] = 999
arr

array([5555, 5555,   33,   33,   33])

In [45]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [46]:
arr2d[0][1]

2

In [47]:
arr2d[0, 1]

2

In [48]:
arr2d[0, 1] = 99
arr2d

array([[ 1, 99,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [49]:
arr3d = np.array([[ [1, 2, 3],[4, 5, 6]], [[7, 8, 9], [10, 11, 12]] ])
print(arr3d)
print("\n", arr3d[0,1,2])
print("\n", arr3d[1])
print("\n", arr3d[1,1])

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]

 6

 [[ 7  8  9]
 [10 11 12]]

 [10 11 12]


In [50]:
x = arr3d[1].copy()

In [51]:
x

array([[ 7,  8,  9],
       [10, 11, 12]])

In [52]:
x[0]

array([7, 8, 9])

In [53]:
x[0,2] = 99

In [54]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [55]:
x

array([[ 7,  8, 99],
       [10, 11, 12]])

### Indexing with slices

In [56]:
arr = np.array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])
arr[1:6]

array([ 1,  2,  3,  4, 64])

In [57]:
arr2d

array([[ 1, 99,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [58]:
arr2d[:2]

array([[ 1, 99,  3],
       [ 4,  5,  6]])

In [59]:
arr2d[:2,1:]

array([[99,  3],
       [ 5,  6]])

In [60]:
arr2d[:2,1]

array([99,  5])

In [61]:
arr2d[1,:2]

array([4, 5])

In [62]:
arr2d[:2, 2]

array([3, 6])

In [63]:
arr2d[:, :1]

array([[1],
       [4],
       [7]])

In [64]:
print(arr2d)
arr2d[:2, 1:] = 0
print(arr2d[:2, 1:].shape)
print(arr2d)

[[ 1 99  3]
 [ 4  5  6]
 [ 7  8  9]]
(2, 2)
[[1 0 0]
 [4 0 0]
 [7 8 9]]


In [65]:
arr2d=np.random.randn(3,3)
print(arr2d)
arr2d[2] = 0
print(arr2d[2].shape)
print(arr2d)

[[ 1.06295402  0.45251092  0.41847051]
 [-0.45075385 -0.06330346  0.35657809]
 [ 0.01551057  0.00734238 -0.12512253]]
(3,)
[[ 1.06295402  0.45251092  0.41847051]
 [-0.45075385 -0.06330346  0.35657809]
 [ 0.          0.          0.        ]]


In [66]:
arr2d=np.random.randn(3,3)
print(arr2d)
arr2d[2] = 0
print(arr2d[2].shape)
print(arr2d)

[[ 1.77021783 -0.44593701  0.14300894]
 [ 0.35885825 -0.8074119  -0.10937059]
 [ 0.61772842 -1.09385038  0.01984546]]
(3,)
[[ 1.77021783 -0.44593701  0.14300894]
 [ 0.35885825 -0.8074119  -0.10937059]
 [ 0.          0.          0.        ]]


In [67]:
import numpy as np
array3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
array3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [68]:
a3d_cp=array3d[0].copy()
array3d[0]=13
array3d

array([[[13, 13, 13],
        [13, 13, 13]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [69]:
array3d[0]=a3d_cp
array3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [70]:
x=array3d[1]
x

array([[ 7,  8,  9],
       [10, 11, 12]])

In [71]:
arr2d=np.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [72]:
arr2d[1:,1:]

array([[5, 6],
       [8, 9]])

In [73]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [74]:
arr2d[1,:2]

array([4, 5])

In [75]:
arr2d[:2,2]

array([3, 6])

In [76]:
arr2d[:,:1]

array([[1],
       [4],
       [7]])

## Boolean indexing

In [77]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names


array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [78]:
data = np.random.randn(7, 4)*0.5+100
data

array([[ 99.24539889,  99.21341867, 100.47228706, 101.02076425],
       [ 99.39361302, 100.30391471, 100.34502064,  99.07627411],
       [ 99.97788232,  99.65076714,  99.92343422, 100.86771408],
       [ 99.82651323, 100.63173139, 100.15188371, 100.09373172],
       [ 99.69830766,  99.64374538, 100.15916223, 100.07053583],
       [100.24786595,  99.7812902 ,  99.74146046,  99.44439499],
       [100.01193874,  98.71885637,  99.82183744,  99.2948744 ]])

In [79]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [80]:
data[names=='Bob']

array([[ 99.24539889,  99.21341867, 100.47228706, 101.02076425],
       [ 99.82651323, 100.63173139, 100.15188371, 100.09373172]])

In [81]:
data[names=='Bob', 2:]

array([[100.47228706, 101.02076425],
       [100.15188371, 100.09373172]])

In [82]:
data[names=='Bob', 3]

array([101.02076425, 100.09373172])

In [83]:
data[~(names=='Bob')]

array([[ 99.39361302, 100.30391471, 100.34502064,  99.07627411],
       [ 99.97788232,  99.65076714,  99.92343422, 100.86771408],
       [ 99.69830766,  99.64374538, 100.15916223, 100.07053583],
       [100.24786595,  99.7812902 ,  99.74146046,  99.44439499],
       [100.01193874,  98.71885637,  99.82183744,  99.2948744 ]])

In [84]:
cond = names=='Bob'

In [85]:
data[~cond]

array([[ 99.39361302, 100.30391471, 100.34502064,  99.07627411],
       [ 99.97788232,  99.65076714,  99.92343422, 100.86771408],
       [ 99.69830766,  99.64374538, 100.15916223, 100.07053583],
       [100.24786595,  99.7812902 ,  99.74146046,  99.44439499],
       [100.01193874,  98.71885637,  99.82183744,  99.2948744 ]])

In [86]:
mask = (names=='Bob') | (names=='Joe')
mask

array([ True,  True, False,  True, False,  True,  True])

In [87]:
data[~mask]

array([[ 99.97788232,  99.65076714,  99.92343422, 100.86771408],
       [ 99.69830766,  99.64374538, 100.15916223, 100.07053583]])

In [88]:
data[data>100]=999
data

array([[ 99.24539889,  99.21341867, 999.        , 999.        ],
       [ 99.39361302, 999.        , 999.        ,  99.07627411],
       [ 99.97788232,  99.65076714,  99.92343422, 999.        ],
       [ 99.82651323, 999.        , 999.        , 999.        ],
       [ 99.69830766,  99.64374538, 999.        , 999.        ],
       [999.        ,  99.7812902 ,  99.74146046,  99.44439499],
       [999.        ,  98.71885637,  99.82183744,  99.2948744 ]])

In [89]:
data[names != 'Joe'] = 7
data

array([[  7.        ,   7.        ,   7.        ,   7.        ],
       [ 99.39361302, 999.        , 999.        ,  99.07627411],
       [  7.        ,   7.        ,   7.        ,   7.        ],
       [  7.        ,   7.        ,   7.        ,   7.        ],
       [  7.        ,   7.        ,   7.        ,   7.        ],
       [999.        ,  99.7812902 ,  99.74146046,  99.44439499],
       [999.        ,  98.71885637,  99.82183744,  99.2948744 ]])

## Fancy indexing

In [90]:
import numpy as np
arr = np.empty((8,4))
print(arr)

[[2.36162114e-316 0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [6.92916506e-310 5.02034658e+175 1.40437999e+165 6.58136398e-038]
 [7.31645420e-042 5.10409857e-066 1.91810877e-052 5.19565184e-144]
 [3.59751658e+252 1.46901661e+179 8.37404147e+242 2.59027926e-144]
 [3.80985069e+180 2.28604334e+243 7.49874301e+247 2.59903827e-144]
 [1.45216567e-047 2.80476295e-032 5.70430454e-066 5.29074508e+174]
 [3.98450681e+252 6.00736842e-067 4.52576986e+097 1.03163110e-071]
 [4.26659782e+126 1.28625693e+248 9.46215455e-053 6.92926878e-310]]


In [91]:
for i in range(arr.shape[0]):
    arr[i]=i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [92]:
arr[[4,3,0,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [93]:
arr = np.arange(32)
arr = arr.reshape(8,4)

In [94]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

## Transposing arrays & swapping axes

In [95]:
arr = np.arange(15).reshape(3,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [96]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [97]:
arr = np.random.randn(6, 3)

In [98]:
np.dot(arr.T, arr) 

array([[ 4.01819039, -1.27970517,  2.92686096],
       [-1.27970517, 11.57301942,  1.12755419],
       [ 2.92686096,  1.12755419,  3.5615217 ]])

In [99]:
a = range(20)
a = [3 for x in a]
a = np.array(a).reshape(5,4)
a

array([[3, 3, 3, 3],
       [3, 3, 3, 3],
       [3, 3, 3, 3],
       [3, 3, 3, 3],
       [3, 3, 3, 3]])

In [100]:
b = np.dot(a, a.T)
b

array([[36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36]])

In [101]:
c = a @ a.T
c

array([[36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36],
       [36, 36, 36, 36, 36]])

In [102]:
arr = np.arange(16).reshape((2, 2, 4))

In [103]:
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [104]:
arr.T

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

In [105]:
a = arr.swapaxes(1,2)
a

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [106]:
b = arr.swapaxes(1,0) # esto no lo entiendo!
b

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])