# Numpy is the Python library used to work with numerical data.

* A numpy array is represented as numpy.ndarray, where 'nd' stands for N-dimensional.
The data can be in any dimension: 1D - vector, 2D - matrix, 3D - tensors.
* Numpy array vs Python list - A numpy array contains homogeneous data, making it faster compared to a Python list. Also, a Python list is stored in a 'pass by reference' format in memory, which means the memory doesn't store the actual data present inside the list; it actually stores the location reference of the data present in a list stored somewhere else. This is the reason why lists are heterogeneous.
* One of the main reason of numpy array being fast is that the are written in C language so there is no overhead as we compare to python list. 

In [1]:
import numpy as np
import random 

# Terminology we should know before starting
* np.shape() - used to know the shape before we start working on the data 
* np.size() - this function return total number of elements inside our array
* np.ndim() - to know dimensions of the array
* type(arr_name) - python datatype. like list,tuple,set,dict,ndarray
* arr_name.dtype - type of data stored inside the python type like. int,float,str etc

In [2]:
a = np.array([10,9,8,7,6,5,4,3,2,1]).reshape(5,2)

print(a)

[[10  9]
 [ 8  7]
 [ 6  5]
 [ 4  3]
 [ 2  1]]


In [3]:
print('Shape of the array:',np.shape(a))
print('No of elements inside the array:',np.size(a))
print('Dimensions of array:',np.ndim(a))
print('Python data type',type(a))
print('Type of data store inside and array',a.dtype)


Shape of the array: (5, 2)
No of elements inside the array: 10
Dimensions of array: 2
Python data type <class 'numpy.ndarray'>
Type of data store inside and array int32


# While creating a ndarray we can explicitely assign a datatype 
* This has a great advantage in memory consumption and speed of the data retrival 
* To achieve the above task we have to use a function called sys.getsizeof(arr_name) and import sys module
* We can also do the same of ndarray which we already created by using a function- arr_name.astype('type')

In [4]:
import sys

arr = np.arange(100)
print('initial dtype:',arr.dtype)
print('initial memory utilization:',sys.getsizeof(arr))

# lets create the same array where we can implicitly tell the dtype

arr1 = np.arange(100,dtype='int16')
print('memory utilization after conversion:',sys.getsizeof(arr1))

# This would give a very big impact on the larger data

initial dtype: int32
initial memory utilization: 512
memory utilization after conversion: 312


In [5]:
arr = np.arange(100)
print('initial memory utilization:',sys.getsizeof(arr))
arr1 = arr.astype('int16')
print('memory utilization after dtype conversion:',sys.getsizeof(arr1))

# this code also have the same effect as we seen above

initial memory utilization: 512
memory utilization after dtype conversion: 312


# Creating numpy array
* np.array(["array"])
* np.arange()
* np.zeros()
* np.ones()
* np.empty()
* np.random()
* np.linspace()
* np.identity()

In [6]:
np.array([1,2,3,4,5,6])

array([1, 2, 3, 4, 5, 6])

In [36]:
np.arange((25)).reshape(5,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [8]:
np.zeros((5,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [9]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [10]:
np.empty((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [40]:
# for this we need to import a module called random

np.random.randint(10,20,10).reshape(5,2)

array([[15, 12],
       [17, 11],
       [15, 18],
       [16, 19],
       [18, 12]])

In [12]:
# this function used to get a linearly spaced array between a range

np.linspace(5,25)

array([ 5.        ,  5.40816327,  5.81632653,  6.2244898 ,  6.63265306,
        7.04081633,  7.44897959,  7.85714286,  8.26530612,  8.67346939,
        9.08163265,  9.48979592,  9.89795918, 10.30612245, 10.71428571,
       11.12244898, 11.53061224, 11.93877551, 12.34693878, 12.75510204,
       13.16326531, 13.57142857, 13.97959184, 14.3877551 , 14.79591837,
       15.20408163, 15.6122449 , 16.02040816, 16.42857143, 16.83673469,
       17.24489796, 17.65306122, 18.06122449, 18.46938776, 18.87755102,
       19.28571429, 19.69387755, 20.10204082, 20.51020408, 20.91836735,
       21.32653061, 21.73469388, 22.14285714, 22.55102041, 22.95918367,
       23.36734694, 23.7755102 , 24.18367347, 24.59183673, 25.        ])

In [13]:
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

# Reshaping / Tranpose / Flip
* while reshaping an array we have to make sure that the number of elements inside the array and the new array are same otherwise python will through an error
* transpose will change rows to columns and vice versa
* flip function will flip all the elements upside dowm

In [14]:
arr = np.linspace(5,20,10)

new_shape = arr.reshape(5,2)

new_shape

array([[ 5.        ,  6.66666667],
       [ 8.33333333, 10.        ],
       [11.66666667, 13.33333333],
       [15.        , 16.66666667],
       [18.33333333, 20.        ]])

In [15]:
arr = np.arange(30).reshape(3,10)

print('without tranpose:',arr)

print('after transpose',np.transpose(arr))



without tranpose: [[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]]
after transpose [[ 0 10 20]
 [ 1 11 21]
 [ 2 12 22]
 [ 3 13 23]
 [ 4 14 24]
 [ 5 15 25]
 [ 6 16 26]
 [ 7 17 27]
 [ 8 18 28]
 [ 9 19 29]]


* flip()

In [16]:
arr = np.arange(30).reshape(3,10)

print(arr)

np.flip(arr)

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]]


array([[29, 28, 27, 26, 25, 24, 23, 22, 21, 20],
       [19, 18, 17, 16, 15, 14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5,  4,  3,  2,  1,  0]])

#  Sort() & Concat()

In [17]:
a = np.random.randint(10,20,10)

print('before sorting:',a)

print('after sorting',np.sort(a))

before sorting: [19 11 16 17 13 10 17 16 18 16]
after sorting [10 11 13 16 16 16 17 17 18 19]


* Concatenating means joining at the end 

In [18]:
a = np.array([1,1,1,1,1])
b = np.arange(5,15,2)

c = np.concatenate((b,a))

print(c)


[ 5  7  9 11 13  1  1  1  1  1]


# Creating array from existing data
* hstack()
* vstack()
* hsplit()

In [19]:
a = np.array([10,20,30])
b = np.array([40,50,60])

c = np.hstack((a,b))
print('horizontal stack:',c)

c = np.vstack((a,b))
print('vertical stack:',c)

horizontal stack: [10 20 30 40 50 60]
vertical stack: [[10 20 30]
 [40 50 60]]


In [20]:
# lets split the array we have created above
# syntax -> np.split(arr_name,parts)
np.split(c,2)


[array([[10, 20, 30]]), array([[40, 50, 60]])]

# Introducing new dimension to an existing array
* np.newaxis()
* np.expand_dims

In [21]:
a = np.arange(20).reshape(5,4)

print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]]


In [22]:
# introdung new dimension along columns

r = a[np.newaxis,:]
print(r)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]
  [12 13 14 15]
  [16 17 18 19]]]


In [23]:
# introdung new dimension along rows. 

c = a[:,np.newaxis]

print(c)

[[[ 0  1  2  3]]

 [[ 4  5  6  7]]

 [[ 8  9 10 11]]

 [[12 13 14 15]]

 [[16 17 18 19]]]


In [24]:
a = np.identity(5)

b = np.expand_dims(a,axis=0)

print(b)

[[[1. 0. 0. 0. 0.]
  [0. 1. 0. 0. 0.]
  [0. 0. 1. 0. 0.]
  [0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 1.]]]


In [25]:
c = np.expand_dims(a,axis=1)

print(c)

[[[1. 0. 0. 0. 0.]]

 [[0. 1. 0. 0. 0.]]

 [[0. 0. 1. 0. 0.]]

 [[0. 0. 0. 1. 0.]]

 [[0. 0. 0. 0. 1.]]]


# flatten() / ravel()

* These function's are used to flatted a multi-dimensional array to 1D. Both the function's work exactly the same the only difference is that when we use the flatten and make chages in the new array the changes we made wouldn't be reflected on new array. But opposite of this is true

In [26]:
c = np.expand_dims(a,axis=1)

print(c.flatten())
print(c.ravel())

[1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 1.]
[1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 1.]


# Basis arithmetic operation's on ndarray

In [27]:
a = np.array([50,60,70,80])
b = np.array([5,6,7,8])

# functions which can be performed btw two ndarrays
c2 = np.sum((a,b),axis=0)     # if we don't define the axis it will give us the sum of all elements from both the array
s = np.subtract(a,b)
m = np.multiply(a,b)
d = np.divide(a,b)



# these function should be performed on a array
prod = a.prod()
cumprod = a.cumprod()
cumsum = a.cumsum()
percentile = np.percentile(a,70) # syntax np.percentile(ndarray,value)



print('Sum:        ',c)
print('Subtraction:',s)
print('multiply:   ',m)
print('divide:     ',d)
print('product:    ',prod)
print('cumulative sum:    ',cumsum)
print('cumulative product:',cumprod)
print('percentile',percentile)



Sum:         [[[1. 0. 0. 0. 0.]]

 [[0. 1. 0. 0. 0.]]

 [[0. 0. 1. 0. 0.]]

 [[0. 0. 0. 1. 0.]]

 [[0. 0. 0. 0. 1.]]]
Subtraction: [45 54 63 72]
multiply:    [250 360 490 640]
divide:      [10. 10. 10. 10.]
product:     16800000
cumulative sum:     [ 50 110 180 260]
cumulative product: [      50     3000   210000 16800000]
percentile 71.0


# Indexing / Slicing / Fancy Indexing

* This is used to access the elements
* indexing-- arr[postion]
* slicing-- arr[rows:columns]
* fancy-- indexing arr[position]
* `Note`: We can always combine all the above three to get the desired result

In [28]:
arr = np.arange(30).reshape(6,5)

arr[2,3]

13

In [29]:
#slicing

arr = np.arange(30).reshape(6,5)

print(arr)

arr[::2,::2] # to extract every alternate elemnts btw rows and cols

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]
 [25 26 27 28 29]]


array([[ 0,  2,  4],
       [10, 12, 14],
       [20, 22, 24]])

In [30]:
#fancy indexing is used when we can't cut out the elements just by using slicing and indexing
arr[[2,5,4],[2,0,-1]]

array([12, 25, 24])

In [31]:
arr[:,[2,4]]

array([[ 2,  4],
       [ 7,  9],
       [12, 14],
       [17, 19],
       [22, 24],
       [27, 29]])

# Some other useful functions

Both these functions return the index value
* np.argmax(arr)
* np.argmin(arr)


In [32]:
arr = np.random.randint(100,150,50)

new_arr = arr.reshape(10,5)

np.expand_dims(new_arr,axis=1)


array([[[143, 135, 118, 106, 146]],

       [[136, 132, 108, 134, 149]],

       [[115, 119, 108, 129, 139]],

       [[122, 119, 143, 123, 141]],

       [[129, 112, 101, 100, 128]],

       [[109, 123, 147, 113, 138]],

       [[127, 105, 134, 117, 105]],

       [[125, 121, 119, 127, 142]],

       [[133, 113, 123, 117, 107]],

       [[123, 102, 148, 103, 116]]])

In [33]:
print(np.argmax(new_arr,axis=1)) # axis=1 means rowwise
print(np.argmin(new_arr,axis=0)) # axis=0 means columnwise

[4 4 4 2 0 2 2 4 0 2]
[5 9 4 4 6]


# Boolena masking

In [34]:
# Get all the elemenst for the above array between 120 and 140

print(new_arr[new_arr > 120])
print(new_arr[new_arr%2==0])

[143 135 146 136 132 134 149 129 139 122 143 123 141 129 128 123 147 138
 127 134 125 121 127 142 133 123 123 148]
[118 106 146 136 132 108 134 108 122 112 100 128 138 134 142 102 148 116]
