In [1]:
import numpy as np

# Checking Numpy Array vs Python List Efficiency

In [2]:
lst = list(range(1000000))

In [3]:
arr = np.arange(1000000)

In [4]:
# Checking Python list efficicency by multiplying each number in list by 2 10 times.
%time for i in range(10): lst2 = [p * 2 for p in lst]

Wall time: 3.14 s


In [5]:
# Checking Numpy efficicency by multiplying each number in array by 2 10 times.
%time for i in range(10): arr2 = arr * 2

Wall time: 56 ms


# Creating an array and multiplying with scalar element

In [6]:
# Generating some random data.
data = np.random.randn(2, 3)

In [7]:
data

array([[-0.31997477,  1.2862619 , -1.99638634],
       [-0.05587833, -2.47528375, -0.3007279 ]])

In [8]:
data * 10  # it will multiply each element in array by 10 without writing any for loop.

array([[ -3.19974769,  12.86261899, -19.9638634 ],
       [ -0.55878332, -24.75283749,  -3.00727901]])

In [9]:
data + data   # All the element in data are added to each corresponding cell in the array.

array([[-0.63994954,  2.5725238 , -3.99277268],
       [-0.11175666, -4.9505675 , -0.6014558 ]])

# Array Shape And Dtype

In [10]:
# Indicate how many rows and columns in the array, in this case 2 rows and 3 columns.
data.shape

(2, 3)

In [11]:
# Indicate the array data type, in this case float.
data.dtype

dtype('float64')

# Creating ndarray

In [12]:
# The easiest way to create an array is to pass list to array function.
data1 = [6, 7.5, 1, 8, 2]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 1. , 8. , 2. ])

In [13]:
# Passing nested lists of equal length will create a multidimensional array.
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [14]:
# To check is it really a multidimensional array, we use ndim.
arr2.ndim

2

In [15]:
arr2.shape

(2, 4)

# Other ways for creating ndarray or Numpy Array

In [16]:
# Creating array of zeros.
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [17]:
# Pass a shape for higher dimensions.
np.zeros((2, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [18]:
# Creating array of ones.
np.ones(8)

array([1., 1., 1., 1., 1., 1., 1., 1.])

In [19]:
data3 = np.ones((3, 2, 3))
data3

array([[[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]]])

In [20]:
data3.ndim

3

In [21]:
# Creating array empty elements. -> It’s not safe to assume that np.empty will return an array of all
#zeros. In some cases, it may return uninitialized “garbage” values.
np.empty((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [22]:
# Generating series of numbers.
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [23]:
np.identity(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

# Datatypes for ndarray

In [24]:
# dtype is a special object containing information about ndarray.
arr1 = np.array([1, 2, 3, 4], dtype=np.float64)

In [25]:
arr1

array([1., 2., 3., 4.])

In [26]:
arr2 = np.array([1., 2., 3., 4.], dtype=np.int32)

In [27]:
arr2

array([1, 2, 3, 4])

In [28]:
arr1.dtype

dtype('float64')

In [29]:
arr2.dtype

dtype('int32')

In [30]:
# CASTING using numpy ---- astype method ------

In [31]:
arr = np.array([1, 2, 3, 4])

In [32]:
arr.dtype

dtype('int32')

In [33]:
float_arr = arr.astype(np.float64)

In [34]:
float_arr

array([1., 2., 3., 4.])

In [35]:
float_arr.dtype

dtype('float64')

In [36]:
arr = np.array([2.3, 4.2, 5.3, 6.2, 1.9, 4.5, 1.0])

In [37]:
arr.dtype

dtype('float64')

In [38]:
int_arr = arr.astype(np.int32)

In [39]:
int_arr

array([2, 4, 5, 6, 1, 4, 1])

In [40]:
int_arr.dtype

dtype('int32')

In [41]:
# Converting Strings to floats
string_arr = np.array(['3.4', '-2.3', '4.5', '8', '3.8', '5.2', '3.7'], dtype=np.string_)

In [42]:
string_arr

array([b'3.4', b'-2.3', b'4.5', b'8', b'3.8', b'5.2', b'3.7'], dtype='|S4')

In [43]:
string_arr.dtype

dtype('S4')

In [44]:
float_arr = string_arr.astype(np.float64)

In [45]:
float_arr

array([ 3.4, -2.3,  4.5,  8. ,  3.8,  5.2,  3.7])

In [46]:
float_arr.dtype

dtype('float64')

# Arithematic with Numpy Arrays

In [85]:
arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)

In [86]:
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [87]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [88]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [89]:
# Arithematic operations with scalar propagates scalar to each element in array.
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [90]:
arr * 2

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [91]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [92]:
# Comparison
# Comparison between arrays of same size yields boolean arrays.

In [93]:
arr1 = np.array([[0, 4, 1], [7, 2, 12]], dtype=np.float64)

In [94]:
arr1

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [95]:
bool_arr = arr1 > arr

In [96]:
bool_arr

array([[False,  True, False],
       [ True, False,  True]])

In [97]:
bool_arr.dtype

dtype('bool')

# Indexing And Slicing

In [98]:
arr = np.arange(10)

In [99]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [100]:
arr[5]

5

In [101]:
arr[5:8]

array([5, 6, 7])

In [102]:
arr[5:8] = 12

In [103]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [104]:
# SLCING IN ARRAYS ARE VIEWS.
arr_slice = arr[5:8]

In [105]:
arr_slice

array([12, 12, 12])

In [106]:
arr_slice[1] = 12345

In [107]:
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [108]:
arr_slice

array([   12, 12345,    12])

HIGHER DIMENSIONS INDEXING

In [109]:
arr2d = np.array([[1, 2, 3, 4], [3, 5, 2, 5]])

In [110]:
arr2d[1]

array([3, 5, 2, 5])

In [111]:
arr2d[1][3]

5

In [112]:
arr2d[1, 3]

5

3D Array

In [147]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [148]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [149]:
old_values = arr3d[0].copy()

In [150]:
old_values

array([[1, 2, 3],
       [4, 5, 6]])

In [151]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [152]:
arr3d[0] = 42

In [153]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [154]:
arr3d[0] = old_values

In [155]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

Indexing with slices

In [165]:
arr = np.arange(15)

In [166]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [167]:
arr[1:6]

array([1, 2, 3, 4, 5])

In [168]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [169]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [183]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [184]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

In [187]:
arr2d[1, :2]

array([4, 5])

In [199]:
arr2d[:2,2]

array([3, 6])

In [201]:
arr2d[:2, 2]

array([3, 6])

Boolean Indexing

In [202]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [203]:
data = np.random.randn(7, 4)

In [205]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [206]:
data

array([[-0.46679348,  0.57551317, -0.6664456 ,  0.98328079],
       [-0.77659051,  0.6759002 , -0.40769423,  0.26889335],
       [-0.04492249, -0.73252894,  1.35598297,  0.07852337],
       [ 1.61648641, -0.91073429,  1.07563064,  0.18720337],
       [ 0.9886471 ,  0.52373601, -2.65035745,  0.69002099],
       [-0.68846659,  0.35385643, -0.3077107 ,  0.96433211],
       [ 0.53971059, -1.01323083,  0.25274126,  0.15219902]])

In [207]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [208]:
data[names == 'Bob']

array([[-0.46679348,  0.57551317, -0.6664456 ,  0.98328079],
       [ 1.61648641, -0.91073429,  1.07563064,  0.18720337]])

In [216]:
data[names == 'Bob', 2:]

array([[-0.6664456 ,  0.98328079],
       [ 1.07563064,  0.18720337]])

In [217]:
data[names == 'Bob', 3]

array([0.98328079, 0.18720337])

In [219]:
data[~(names == 'Bob')]

array([[-0.77659051,  0.6759002 , -0.40769423,  0.26889335],
       [-0.04492249, -0.73252894,  1.35598297,  0.07852337],
       [ 0.9886471 ,  0.52373601, -2.65035745,  0.69002099],
       [-0.68846659,  0.35385643, -0.3077107 ,  0.96433211],
       [ 0.53971059, -1.01323083,  0.25274126,  0.15219902]])

In [222]:
data[names != 'Bob']

array([[-0.77659051,  0.6759002 , -0.40769423,  0.26889335],
       [-0.04492249, -0.73252894,  1.35598297,  0.07852337],
       [ 0.9886471 ,  0.52373601, -2.65035745,  0.69002099],
       [-0.68846659,  0.35385643, -0.3077107 ,  0.96433211],
       [ 0.53971059, -1.01323083,  0.25274126,  0.15219902]])

In [223]:
data[names == 'Bob'] = 7.0

In [224]:
data

array([[ 7.        ,  7.        ,  7.        ,  7.        ],
       [-0.77659051,  0.6759002 , -0.40769423,  0.26889335],
       [-0.04492249, -0.73252894,  1.35598297,  0.07852337],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.9886471 ,  0.52373601, -2.65035745,  0.69002099],
       [-0.68846659,  0.35385643, -0.3077107 ,  0.96433211],
       [ 0.53971059, -1.01323083,  0.25274126,  0.15219902]])

In [226]:
data[names != 'Bob'] = 0.0

In [227]:
data

array([[7., 7., 7., 7.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [7., 7., 7., 7.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [229]:
cond = names == 'Bob'

In [230]:
cond

array([ True, False, False,  True, False, False, False])

In [232]:
data[~cond]

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

# Fancy Indexing 

In [235]:
arr = np.empty((8, 4))

In [237]:
for i in range(8):
    arr[i] = i

In [239]:
for i in arr:
    print(i)

[0. 0. 0. 0.]
[1. 1. 1. 1.]
[2. 2. 2. 2.]
[3. 3. 3. 3.]
[4. 4. 4. 4.]
[5. 5. 5. 5.]
[6. 6. 6. 6.]
[7. 7. 7. 7.]


In [246]:
arr[[3, 2, 7, 6]]

array([[3., 3., 3., 3.],
       [2., 2., 2., 2.],
       [7., 7., 7., 7.],
       [6., 6., 6., 6.]])

In [248]:
arr[[-1, 2, -8]]

array([[7., 7., 7., 7.],
       [2., 2., 2., 2.],
       [0., 0., 0., 0.]])

Passing Multiple Index Arrays

In [249]:
arr = np.arange(32).reshape((8, 4))

In [250]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [251]:
arr[[3, 4, 1], [3, 1, 0]]

array([15, 17,  4])

In [255]:
# NEEEED TO UNDERSTAND
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])