# <font color = red> Numpy

In [2]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

## <font color = blue> Creating a numpy array

### Using `np.array()`

In [3]:
# Create a numpy array

np_arr = np.array([[11,23,32],[12,34,56],[77,77,54]])
print(np_arr)
print()
print(np_arr.shape)

[[11 23 32]
 [12 34 56]
 [77 77 54]]

(3, 3)


In [4]:
np_arr = np.array([[11,23,32],[12,34,56],[77,77]])
print(np_arr)
print()
print(np_arr.shape)

[list([11, 23, 32]) list([12, 34, 56]) list([77, 77])]

(3,)


In [None]:
np_arr = np.array([[11,23,32],[12,34,56],[77,77,None]])
print(np_arr)
print()
print(np_arr.shape)

[[11 23 32]
 [12 34 56]
 [77 77 None]]

(3, 3)


### `np.zeros() and np.ones()`

In [None]:
np_zeros = np.zeros([2,4])
np_zeros

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
np_ones = np.ones([2,4])
np_ones

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

### `np.full()`

In [None]:
np.full((3,2),4)

array([[4, 4],
       [4, 4],
       [4, 4]])

### `np.arange()`


In [None]:
np.arange(12)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

### `np.linspace()`

the `linspace()` function generates an array with evenly spaced values between specified start, end values, using a specified number of elements

In [None]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [None]:
np.linspace(0, 1, 20)
# 20 points equally spaced between 0 and 1

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [None]:
np.linspace(0, 1, 20, False)
# 20 points Unequally spaced between 0 and 1

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

---
### `identity`

In [None]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### `np.random()` 

In [None]:
np.random.random(size=2)
# Return random floats in the half-open interval [0.0, 1.0).
# Results are from the “continuous uniform” distribution over the stated interval.

array([0.66018062, 0.61165426])

In [None]:
np.random.normal(size=2)
# creates an array of specified shape and fills it with random values which 
# is actually a part of Normal(Gaussian)Distribution. 
# This is Distribution is also known as Bell Curve because of its characteristics shape.

# https://www.geeksforgeeks.org/rand-vs-normal-numpy-random-python/

array([-0.50271929, -0.89775109])

In [None]:
np.random.randint(2,10, size =(5,5) )  # 25 random numbers between 2 and 10 in a 5x5 matrix

array([[3, 5, 6, 7, 2],
       [5, 8, 9, 3, 5],
       [2, 4, 2, 5, 5],
       [2, 6, 5, 6, 9],
       [5, 4, 5, 5, 8]])

In [None]:
np.random.rand(2, 4)

array([[0.03095338, 0.94749641, 0.37644835, 0.53019146],
       [0.62435959, 0.41807392, 0.60394879, 0.62182976]])

## <font color = blue> Reshaping the array

### `np.arange()`

In [None]:
np.arange(12).reshape(3,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
np.arange(12).reshape(4,3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [None]:
np.arange(12).reshape(12,1)

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11]])

`arr.flatten()`

In [14]:
arr = np.array([[10,20,30],[40,50,60],[18,18,18]])
arr.flatten()

array([10, 20, 30, 40, 50, 60, 18, 18, 18])

In [15]:
arr.flatten().shape

(9,)

There are two ways to flatten a matrix depending on the data type. For Numpy arrays, we use np.array.flatten() command; for non-array matrices, we use matrix.ravel(). Please try it out.

The numpy module of Python provides a function called numpy.ravel, which is used to change a 2-dimensional array or a multi-dimensional array into a contiguous flattened array. The returned array has the same data type as the source array or input array. If the input array is a masked array, the returned array will also be a masked array.

- Syntax: numpy.ravel(x, order='C')  
- Parameters: x: array_like

This parameter defines the input array, which we want to change in a contiguous flattened array. The array elements are read in the order specified by the order parameter and packed as a 1-D array.

order: {'C','F', 'A', 'K'}(optional)

If we set the order parameter to 'C', it means that the array gets flattened in row-major order. If 'F' is set, the array gets flattened in column-major order. The array is flattened in column-major order only when 'A' is Fortran contiguous in memory, and when we set the order parameter to 'A'. The last order is 'K', which flatten the array in same order in which the elements occurred in the memory. By default, this parameter is set to 'C'.

Returns:
This function returns a contiguous flatten array with the same data type as an input array and has shape equal to (x.size).

In [None]:
arr

array([[ 3,  4,  5,  6],
       [ 7,  8,  9, 10],
       [11, 12, 13, 14],
       [15, 16, 17, 18]])

In [None]:
arr_ravel = np.ravel(arr)  
print(arr_ravel)

[ 3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]


In [None]:
 np.ravel(arr, order='C')

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18])

In [None]:
 np.ravel(arr, order='F')

array([ 3,  7, 11, 15,  4,  8, 12, 16,  5,  9, 13, 17,  6, 10, 14, 18])

## <font color = blue> Basic Slicing and Indexing

### `Location based indexing`

In [17]:
arr = np.array([[10,20,30],[40,50,60],[70,80,90]])

In [18]:
print(arr[0])

[10 20 30]


In [19]:
print(arr[0][0])

10


In [20]:
print(arr[0,0])

10


In [21]:
print(arr[-1])

[70 80 90]


In [22]:
print(arr[:,1:3])

[[20 30]
 [50 60]
 [80 90]]


In [23]:
print(arr[[0,1,2],[1,0,0]])

[20 40 70]


### `Boolean Indexing`

### `np.where(  )`

- Returns the index based on the boolean

### `np.isnan()` 
- Returns the boolean of data == np.nan

In [74]:
location = np.array(["Mumbai","Mumbai","Delhi","Chennai","Bangalore"]) #2 branches in Mumbai
sales_data = np.random.randint(1,10,size=(5,5))
sales_data = sales_data.astype(float)
sales_data[[[1,4],[2,3]]] = np.nan
print(sales_data)

[[ 2.  7.  8.  7.  5.]
 [ 1.  7. nan  1.  7.]
 [ 6.  4.  2.  7.  1.]
 [ 3.  3.  3.  6.  1.]
 [ 1.  8.  6. nan  7.]]


In [75]:
sales_data[location =="Mumbai"]
# depending on which index is True, it prints that row
# In this case, mumbai is at index 2, therefore row no. 2 will be printed

array([[ 2.,  7.,  8.,  7.,  5.],
       [ 1.,  7., nan,  1.,  7.]])

In [76]:
(location =="Mumbai")

array([ True,  True, False, False, False])

In [77]:
np.where(location == "Mumbai")

(array([0, 1], dtype=int32),)

In [78]:
# if the match does not happen , then all values are false.
sales_data[location =="Kolkata"]
# the o/p in such a case is indicating shape as(0 rows, 3 features)

array([], shape=(0, 5), dtype=float64)

We can also use Boolean indexing for selecting some elements of an array that satisfy a
particular condition. For example, in the previous array suppose we want to only select
non-zero elements.
We can do that easily using the following code.

In [79]:
sales_data >4

array([[False,  True,  True,  True,  True],
       [False,  True, False, False,  True],
       [ True, False, False,  True, False],
       [False, False, False,  True, False],
       [False,  True,  True, False,  True]])

`np.nan==np.nan`

In [84]:
np.isnan(sales_data)

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False]])

In [80]:
np.where(np.isnan(sales_data))

(array([1, 4], dtype=int32), array([2, 3], dtype=int32))

In [81]:
sales_data[np.isnan(sales_data)] = 0.0 # all nan values will be reset to 0
sales_data

array([[2., 7., 8., 7., 5.],
       [1., 7., 0., 1., 7.],
       [6., 4., 2., 7., 1.],
       [3., 3., 3., 6., 1.],
       [1., 8., 6., 0., 7.]])

###  `np.ndenumerate()`

ndenumerate return the co-ordinates and corresponding values in the co-ordinates

In [86]:
A = np.array([[11, 22, 23], [33, 43, 55]])

In [87]:
np.ndenumerate(A)

<numpy.ndenumerate at 0xafecdc0>

In [88]:
for index, x in np.ndenumerate(A):
    print(index, x)

(0, 0) 11
(0, 1) 22
(0, 2) 23
(1, 0) 33
(1, 1) 43
(1, 2) 55


## <font color=blue> Operations on Arrays


In [91]:
arr1 = np.arange(15).reshape(5,3)
arr1

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [92]:
arr1 + 5 

array([[ 5,  6,  7],
       [ 8,  9, 10],
       [11, 12, 13],
       [14, 15, 16],
       [17, 18, 19]])

In [93]:
arr1 * 2

array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22],
       [24, 26, 28]])

In [94]:
arr2 = np.arange(5).reshape(5,1) 
print(arr2)


[[0]
 [1]
 [2]
 [3]
 [4]]


In [95]:
print(arr2 + arr1 ) # arr2[i] + arr1's entire row

[[ 0  1  2]
 [ 4  5  6]
 [ 8  9 10]
 [12 13 14]
 [16 17 18]]


## <font color=blue>  Joining and Stacking

- Vertical stacking (row wise) using `vstack()`
- Horizontal stacking (column wise) using `hstack()`
- Depth wise stacking (along third axis) using `dstack()`
- `concatenate()` function creates a new array by appending arrays after each other, along a given axis
- `append()` function appends an element to an array and creates a new copy of the array

In [97]:
import numpy as np


array_1 = np.arange(10).reshape(2,5)
print(array_1)

print("-"*30)

array_2 = np.arange(13,23,1).reshape(2,5)
print(array_2)

[[0 1 2 3 4]
 [5 6 7 8 9]]
------------------------------
[[13 14 15 16 17]
 [18 19 20 21 22]]


In [102]:
array_vstack = np.vstack([array_1,array_2])
print(array_vstack)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [13 14 15 16 17]
 [18 19 20 21 22]]


In [103]:
print(array_vstack.shape)

(4, 5)


In [104]:
array_hstack = np.hstack([array_1,array_2])
print(array_hstack)

[[ 0  1  2  3  4 13 14 15 16 17]
 [ 5  6  7  8  9 18 19 20 21 22]]


In [105]:
print(array_hstack.shape)

(2, 10)


In [106]:
array_dstack = np.dstack([array_1,array_2])
print(array_dstack)

[[[ 0 13]
  [ 1 14]
  [ 2 15]
  [ 3 16]
  [ 4 17]]

 [[ 5 18]
  [ 6 19]
  [ 7 20]
  [ 8 21]
  [ 9 22]]]


In [107]:
print(array_dstack.shape) # two sets of 5x2 matrices

(2, 5, 2)


In [108]:
array_concatenate = np.concatenate([array_1,array_2])
print(array_concatenate)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [13 14 15 16 17]
 [18 19 20 21 22]]


In [109]:
print(array_concatenate.shape) # same as vstack

(4, 5)


In [110]:
array_append = np.append(array_1,array_2)
print(array_append)

[ 0  1  2  3  4  5  6  7  8  9 13 14 15 16 17 18 19 20 21 22]


In [111]:
print(array_append.shape)

(20,)


In [112]:
array_append = np.append(array_1,array_2,axis =1)
print(array_append)

[[ 0  1  2  3  4 13 14 15 16 17]
 [ 5  6  7  8  9 18 19 20 21 22]]


In [113]:
print(array_append.shape)

(2, 10)


In [114]:
array_append = np.append(array_1,array_2,axis = 0)
print(array_append)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [13 14 15 16 17]
 [18 19 20 21 22]]


In [115]:
print(array_append.shape)

(4, 5)


## <font color=blue> Size of objects in Memory


In [116]:
import sys

In [117]:
# An integer in Python is > 24bytes
sys.getsizeof(1)

14

In [118]:
sys.getsizeof(10)

14

In [119]:
# Longs are even larger
sys.getsizeof(10**100)

58

In [120]:
# Numpy size is much smaller
np.dtype(int).itemsize

4

In [121]:
# Numpy size is much smaller
np.dtype(np.int8).itemsize

1

In [130]:
np.dtype(float).itemsize

8

#### Lists are even larger

In [124]:
# A one-element list
sys.getsizeof([1])

32

In [125]:
# An array of one element in numpy
np.array([1]).nbytes

4

#### And performance is also important


In [126]:
l = list(range(100000))
l

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [127]:
import numpy as np
a = np.arange(100000)
a

array([    0,     1,     2, ..., 99997, 99998, 99999])

In [128]:
%time np.sum(a ** 2) # Numpy is much faster

Wall time: 997 µs


216474736

In [129]:
%time sum([x ** 2 for x in l])

Wall time: 46.9 ms


333328333350000

## <font color = blue> Sorting</font>

### `np.sort()`

In [None]:
arr = np.random.randint(1,7, size=[8, 4])
arr

array([[2, 2, 5, 6],
       [4, 2, 4, 2],
       [2, 4, 1, 2],
       [4, 6, 5, 3],
       [2, 1, 3, 1],
       [5, 5, 4, 4],
       [2, 6, 6, 5],
       [6, 5, 3, 1]])

We have a random array of 8 rows and 4 columns.

If you use the np.sort function with axis=0, all the columns will be sorted in ascending order independent of eachother, effectively compromising the integrity of the row items. In simple terms, the values in each row gets corrupted with values from other rows.

In [None]:
# Sort each columns of arr
np.sort(arr, axis=0)

array([[2, 1, 1, 1],
       [2, 2, 3, 1],
       [2, 2, 3, 2],
       [2, 4, 4, 2],
       [4, 5, 4, 3],
       [4, 5, 5, 4],
       [5, 6, 5, 5],
       [6, 6, 6, 6]])

Since I don’t want the content of rows to be disturbed, I resort to an indirect method using np.argsort.

### `np.argsort()`

Let’s first understand what np.argsort does.

np.argsort returns the index positions of that would make a given 1d array sorted.



In [132]:
# Get the index positions that would sort the array
x = np.array([1, 10, 5, 2, 8, 9])
sort_index = np.argsort(x)
print(sort_index)

[0 3 2 4 5 1]


How to interpret this?

In array ‘x’, the 0th item is the smallest, 3rd item is the second smallest and so on.

In [133]:
x[sort_index]

array([ 1,  2,  5,  8,  9, 10])

Now, in order to sort the original arr, I am going to do an argsort on the 1st column and use the resulting index positions to sort arr. See the code.

In [134]:
# Argsort the first column
sorted_index_1stcol = arr[:, 0].argsort()

# Sort 'arr' by first column without disturbing the integrity of rows
arr[sorted_index_1stcol]


array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

To sort it in decreasing order, simply reverse the argsorted index.

In [136]:
# Descending sort
arr[sorted_index_1stcol[::-1]]

array([[70, 80, 90],
       [40, 50, 60],
       [10, 20, 30]])

### `np.lexsort()`

In [None]:
# Sort by column 0, then by column 1
lexsorted_index = np.lexsort((arr[:, 1], arr[:, 0])) 
arr[lexsorted_index]

array([[2, 1, 3, 1],
       [2, 2, 5, 6],
       [2, 4, 1, 2],
       [2, 6, 6, 5],
       [4, 2, 4, 2],
       [4, 6, 5, 3],
       [5, 5, 4, 4],
       [6, 5, 3, 1]])