Numpy 
- is a linear algebra library for python
- important because almost all of libraries in PyData Ecosystem rely on numpy as one of main building blocks
- incredibly fast because of its bindings to C libraries

numpy arrays come in two flavors - vectors or matrices
- vectors are strictly 1D arrays
- matrices are 2D (can have one row or one column)

- numpy provides efficient storage
- provides better ways to handle data for processing
- we can decide what type of storage we want to use like to store 32 bit integar we can use d-type
- uses relatively less memory to store data

In [1]:
import numpy as np

In [6]:
# create numpy array
myarr = np.array([3,6,2,7])
myarr

array([3, 6, 2, 7])

In [11]:
# specify integar bits with array
myarr1 = np.array([3,6,2,7], np.int8)
myarr1

array([3, 6, 2, 7], dtype=int8)

In [14]:
# dont give too long number if you specify bits
# will work because we havent specified how many bits can be used
myarr2 = np.array([3,6,2,7777777777777777777777777777])
myarr2

array([3, 6, 2, 7777777777777777777777777777], dtype=object)

In [20]:
# dont give too long number if you specify bits
# wont work because we have specified how many bits can be used, we will get error
# if you want to use big numbers use 32  64 or something else according to memory requirement
myarr2 = np.array([3,6,2,7777777777777777777777777777], np.int8)
myarr2

OverflowError: Python int too large to convert to C long

In [23]:
# this will give error because there isnt any further division on 0th index
# it is 1D array
myarr1 = np.array([3,6,2,7], np.int8)
myarr1[0,1]

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [25]:
# correct way to index 1D array
myarr1 = np.array([3,6,2,7], np.int8)
myarr1[0]

np.int8(3)

In [29]:
# for 2D array
myarr1 = np.array([[3,6,2,7]], np.int8)
myarr1[0,1]

np.int8(6)

In [32]:
# dimensions of numpy array
myarr1.shape

(1, 4)

In [34]:
# type of numpy array
myarr1.dtype

dtype('int8')

In [35]:
# change element at particular index
myarr1[0,1] = 45
myarr1

array([[ 3, 45,  2,  7]], dtype=int8)

Methods to create arrays in numpy - there are 5 general methods
- Conversion from other python structures (lists, tuples)
- Intrinsic numpy array creation objects (arange, ones, zeros)
- Reading array from disk, either from standard or custom formats
- Creating arrays from raw bbytes through use of strings or buffers
- Use of special library functions (random)

1. Conversion from other python structures

In [37]:
listarray = np.array([[1,2,3], [5,3,2], [7,8,6], [9,2,6]])
listarray

array([[1, 2, 3],
       [5, 3, 2],
       [7, 8, 6],
       [9, 2, 6]])

In [39]:
listarray.dtype

dtype('int64')

In [40]:
listarray.size

12

In [41]:
# for dictionary
np.array({34,32,23,65})

array({32, 65, 34, 23}, dtype=object)

2. Intrinsic numpy array creation objects

In [44]:
# will make array of zeros
zeros = np.zeros((2,5))
zeros

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [46]:
# will make numpy array with given range
range = np.arange(15)
range

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [48]:
# will give equally linearly spaced 12 numbers from 1 to 5
lspace = np.linspace(1,5,12)
lspace

array([1.        , 1.36363636, 1.72727273, 2.09090909, 2.45454545,
       2.81818182, 3.18181818, 3.54545455, 3.90909091, 4.27272727,
       4.63636364, 5.        ])

In [50]:
# Will give numpy array of 4,6 filled with random elements
emp = np.empty((4,6))
emp

array([[6.23042070e-307, 4.67296746e-307, 1.69121096e-306,
        9.34613185e-307, 6.23053614e-307, 2.22526399e-307],
       [6.23053614e-307, 1.29060871e-306, 6.23055651e-307,
        1.06811422e-306, 3.56043054e-307, 1.37961641e-306],
       [1.00132483e-307, 1.78020169e-306, 7.56601165e-307,
        1.02359984e-306, 2.78149851e-307, 1.06811422e-306],
       [8.45590539e-307, 6.23054972e-307, 1.42419530e-306,
        9.34609790e-307, 8.01097889e-307, 2.56765117e-312]])

In [53]:
# will make an empty array using the space and elemets of some other previous array
emp_like = np.empty_like(lspace)
emp_like
# we can then reassign values according to our need

array([1.        , 1.36363636, 1.72727273, 2.09090909, 2.45454545,
       2.81818182, 3.18181818, 3.54545455, 3.90909091, 4.27272727,
       4.63636364, 5.        ])

In [55]:
# will create identity matrix of 45x45
ide = np.identity(45)
ide

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [56]:
ide.shape

(45, 45)

In [57]:
# reshape numpy array
arr = np.arange(99)
arr
# this will give array from 0-98
# which can also be reshaped into 3 rows of 33 columns 

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98])

In [62]:
arr.reshape(3,33)
# arr.reshape(3,31) will give error because 33 elements cant fit in 31 space

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32],
       [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
        49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
        65],
       [66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
        98]])

In [63]:
# if we print arr it wont be changed, it will be the original one
# to change it
arr = arr.reshape(3,33)
arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32],
       [33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
        49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
        65],
       [66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
        98]])

In [66]:
# to make it 1D again
arr.ravel()
# again arr wont be changed and we need to reassign to make arr change its shape
# arr = arr.raval()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98])

# NUMPY AXIS
1D array -> [1,2,3,4,5] -> Axis0
2D array -> 
    [[1,2,3],
    [4,5,6],
    [7,8,9]]
            -> Axix0, Axis1 
    Axis0 = x axis, Axis1 = y axis
    Axis0 = rows, Axis1 = columns

    sum of Axis0 elements = 12, 15, 18
    sum of Axis1 elements = 6, 15, 24


In [72]:
x = [[1,2,3], [4,5,6], [7,8,9]]

In [73]:
arr = np.array(x)

In [74]:
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [75]:
arr.sum(axis=0)

array([12, 15, 18])

In [76]:
arr.sum(axis=1)

array([ 6, 15, 24])

In [77]:
# transpose of array
arr.T

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [79]:
# gives iterator
for i in arr.flat:
    print(i)

1
2
3
4
5
6
7
8
9


In [80]:
# number of dimensions 
arr.ndim

2

In [81]:
# to get size, number of elements
arr.size

9

In [83]:
# number of total bytes consumed by array
arr.nbytes

72

In [85]:
# 1D array
# argmax = index of max element
one = np.array([1,2,3,4,5,6,7,8,9])
one.argmax()


np.int64(8)

In [87]:
# argmin = index of min element
one.argmin()

np.int64(0)

In [92]:
# gives the indexes that will sort the array
ar1 = np.array([3,4,65,1])
ar1.argsort()

array([3, 0, 1, 2])

In [90]:
# for 2D
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [91]:
# argmax will flatten the array first then give index of max
arr.argmax()

np.int64(8)

In [93]:
# argmin will flatten the array first then give index of min
arr.argmin()

np.int64(0)

In [96]:
# finds max element index in each row
arr.argmax(axis=0) 

array([2, 2, 2])

In [97]:
# finds max element index in each column
arr.argmax(axis=1) 

array([2, 2, 2])

In [99]:
arr.argsort(axis=0)

array([[0, 0, 0],
       [1, 1, 1],
       [2, 2, 2]])

In [100]:
arr.argsort(axis=1)

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])

In [101]:
arr.ravel()

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [105]:
arr.reshape((9,1))

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [108]:
# mathematical operations on np array (matrix operations are allowed)
arr2 = np.array([[1,2,1],[4,0,6],[8,1,0]])
arr + arr2

array([[ 2,  4,  4],
       [ 8,  5, 12],
       [15,  9,  9]])

In [109]:
# you cant do such operations on lists- + extends the lists
[12, 23] + [11, 54]

[12, 23, 11, 54]

In [110]:
# element wise sq root
np.sqrt(arr)

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974],
       [2.64575131, 2.82842712, 3.        ]])

In [112]:
# sum of all elements
arr.sum()

np.int64(45)

In [113]:
# max of all elements
arr.max()

np.int64(9)

In [114]:
# min of all elements
arr.min()

np.int64(1)

In [119]:
# finding elements- gives the index location of elements that match given condition of finding
np.where(arr2>5)

(array([1, 2]), array([2, 0]))

In [120]:
# returns tuple of arrays
type(np.where(arr2>5))

tuple

In [121]:
# will count non zero elements present
arr3 = np.array([[0,2,1],[4,0,6],[0,1,0]])
np.count_nonzero(arr3)

5

In [122]:
# Return the indices of the elements that are non-zero
np.nonzero(arr3)

(array([0, 0, 1, 1, 2]), array([1, 2, 0, 2, 1]))

In [123]:
# numpy takes less space compared to python list
import sys

python_list = [1,4,56,8]
numpy_array = np.array(python_list)

In [124]:
# size of one element of python list
sys.getsizeof(1) * len(python_list)

112

In [125]:
# size of one element of numpy array
numpy_array.itemsize * numpy_array.size

32

In [126]:
# random number matrix
np.random.rand(2,5)

array([[0.89838679, 0.98167298, 0.55394202, 0.22839755, 0.70066143],
       [0.14640396, 0.37333691, 0.65768152, 0.01227676, 0.31293939]])

In [128]:
# random numbers from standard normal distribution
np.random.randn(2,2)

array([[ 0.71862848, -1.03677715],
       [ 0.3143358 , -0.2099196 ]])

In [131]:
# random integars from low to high, lowest inclusive, highest not
np.random.randint(1,100,10)

array([34, 29, 44, 50, 65, 63, 32, 99, 57, 25], dtype=int32)

In [132]:
# indexing
a1 = np.array([3,5,1,8,6,0,4,7,2])
a1[4]

np.int64(6)

In [133]:
a1[1:5] # index 1 element included, index 5 element not included

array([5, 1, 8, 6])

In [134]:
a1[:3] # starting from 0, 0 index included, 3 index not

array([3, 5, 1])

In [135]:
a1[4:] # index 4 to end, 4 index included and goes till end

array([6, 0, 4, 7, 2])

In [136]:
# broadcasting
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [137]:
slice_of_arr = arr[0:6]
slice_of_arr

array([0, 1, 2, 3, 4, 5])

In [138]:
slice_of_arr[:] = 99 # set 99 to all elements
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [142]:
arr 
# original array changed too, meaning data is not copied its just a different view of original array
# numpy does to avoid making unnecessary copies of big arrays
# if you want a copy you have to specifically make a copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [143]:
arr_copy = arr.copy()
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [144]:
arr_copy[:] = 100
arr_copy

array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])

In [146]:
arr # original isnt changed here

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [148]:
# indexing in 2d array
arr = np.array([[0,2,1],[4,0,6],[0,1,0]])
arr

array([[0, 2, 1],
       [4, 0, 6],
       [0, 1, 0]])

In [149]:
# sliced notation
# grab everything upto row 2 and from column 1 to end
arr[:2,1:]

array([[2, 1],
       [0, 6]])

In [150]:
# boolean array
a2 = np.arange(0,11)
a2 > 5

array([False, False, False, False, False, False,  True,  True,  True,
        True,  True])

In [152]:
# conditional selection
a2 = np.arange(0,11)
bool_arr = a2 > 5
a2[bool_arr] # will give the a2 values where bool_arr says true
# same as a2[a2>5]

array([ 6,  7,  8,  9, 10])

In [154]:
arr_2d = np.arange(50).reshape(5,10)
arr_2d

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])

In [155]:
arr_2d[1:3,3:5]

array([[13, 14],
       [23, 24]])

In [156]:
a3 = np.arange(0,11)
a3

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [157]:
a3 * 100 # element by multiplication

array([   0,  100,  200,  300,  400,  500,  600,  700,  800,  900, 1000])

In [159]:
a3 / a3 # first element is 0 but numpy dont give error rather gives warning and null object



array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [160]:
1 / a3 # first element is 0 but numpy dont give error rather gives warning and infinity



array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111,
       0.1       ])

In [161]:
# exponential element by element
np.exp(a3)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04])

In [162]:
# sine element by element
np.sin(a3)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849,
       -0.54402111])

In [164]:
# logrithm
np.log(a3) # log of 0 we get -infinity

  np.log(a3) # log of 0 we get -infinity


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458,
       2.30258509])