#  Numpy
* Numerical Python
* Numpy is the fundamental package for scientific computing with Python
* Much more efficient data storage and operations capability
* Entire ecosystem of python data science tools depend on Numpy

In [3]:
import numpy as np
# Numpy is the linear algebra library for python
np.__version__


'1.15.0'

## 1. Creating Numpy Arrays

### 1.1 Creating Arrays from Scratch

**np.arange(start,stop,step)**

In [None]:
np.

In [5]:
np.arange(10) # Creates numbers 0 to 9

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

**np.zeros()**

In [6]:
np.zeros(5) # creates 5 zeroes

array([0., 0., 0., 0., 0.])

**np.ones()**

In [7]:
np.ones(8) # creates an array of 1

array([1., 1., 1., 1., 1., 1., 1., 1.])

**np.eye()**

In [8]:
np.eye(3) # creates identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

**np.full()**

In [9]:
np.full((2,3),1) # creates a 2 x 3 array filled with 1.  np.full((nrows,ncols),value)

array([[1, 1, 1],
       [1, 1, 1]])

**np.random.randint()**

In [10]:
np.random.randint(0,10,(3,4)) # creates an 3 x 4 array with random integers between 0 and 10

array([[7, 7, 1, 9],
       [7, 6, 5, 9],
       [3, 9, 5, 4]])

In [11]:
np.random.randint(9,20,5) # for just 1d array

array([12, 11, 14, 15, 12])

**np.random.random()**

In [12]:
np.random.random((3,4)) # creates a 3 x 4 array with random uniform values between 0 and 1

array([[0.92882957, 0.67336219, 0.32307924, 0.19268043],
       [0.44635483, 0.33053322, 0.99815933, 0.90653095],
       [0.09495885, 0.43409757, 0.77104847, 0.70614611]])

In [13]:
np.random.rand(3,4) # there is another version with just random.rand(), solves the same purpose

array([[0.41962414, 0.42400997, 0.52546897, 0.06361125],
       [0.60339934, 0.76525656, 0.86530705, 0.13479277],
       [0.93129459, 0.74087314, 0.79736279, 0.27092658]])

**np.random.normal()**

In [14]:
np.random.normal(0,1,(4,5)) # creates a 4 x 5 array with normally distributed random values having mean 0 and SD 1

array([[-1.19045363,  0.5471272 , -0.73288118, -0.02459531,  0.08870488],
       [ 1.22349752,  0.49983734, -0.9955641 ,  1.19164288, -1.8032659 ],
       [ 0.40261807, -1.76404928,  0.5152742 ,  1.52955426,  0.65952126],
       [-1.79563038,  0.7901053 , -2.56262727, -0.20086904,  0.39578291]])

In [15]:
np.random.randn(4,5) # here mean is by default 0 and SD is 1. Similar to the above method

array([[-0.4051464 , -0.11967574, -1.48332591, -1.24761793, -1.00725691],
       [ 0.29347998, -0.86638806, -1.17871732,  0.67644113, -0.51407245],
       [ 1.47788573,  0.87793598, -0.15030283,  1.36410632,  0.56642379],
       [-1.44531466,  0.03221279,  0.13260727, -0.34565319,  1.24431504]])

**np.linspace()**

In [16]:
np.linspace(0,10,5) # creates an array from 0 to 10 with 5 equally spaced values

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

### 1.2 Creating numpy arrays fom lists or tuples

In [17]:
np.array([4,5,6])

array([4, 5, 6])

In [18]:
alist = [9, 7, 5, 6]
np.array(alist)

array([9, 7, 5, 6])

In [38]:
atuple = (4, 5, 6, 7)
np.array(atuple)

array([4, 5, 6, 7])

In [20]:
list_list = [[i, i+2] for i in range(0,5)] # List of Lists
list_list

[[0, 2], [1, 3], [2, 4], [3, 5], [4, 6]]

In [21]:
my_arr = np.array(list_list) # creating a 2D numpy array from lists of lists.
my_arr

array([[0, 2],
       [1, 3],
       [2, 4],
       [3, 5],
       [4, 6]])

In [22]:
my_arr2 = np.arange(10)
my_arr2

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [23]:
list(my_arr2) # convert array back to list

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [24]:
tuple(my_arr2)

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

## 2. Indexing, Modifying Arrays

### 2.1 Attributes of Arrays

**.shape** gives the dimensions of an array

In [25]:
my_arr

array([[0, 2],
       [1, 3],
       [2, 4],
       [3, 5],
       [4, 6]])

In [26]:
my_arr.shape # .shape gives the dimensions of the array.

(5, 2)

**.reshape()**

In [27]:
my_arr.reshape(2,5)

array([[0, 2, 1, 3, 2],
       [4, 3, 5, 4, 6]])

In [39]:
my_arr_new = my_arr.reshape(1,10) # doesn't happen inplace. Original array remains the same.
my_arr_new

array([[0, 2, 1, 3, 2, 4, 3, 5, 4, 6]])

In [40]:
my_arr_new.shape

(1, 10)

In [41]:
my_arr

array([[0, 2],
       [1, 3],
       [2, 4],
       [3, 5],
       [4, 6]])

### 2.2 Indexing of numpy arrays (similar to lists) npa[start:stop:step]

In [42]:
my_arr2 = np.array([31, 89, 94, 56, 34, 69, 98, 41, 53, 83, 77])
my_arr2

array([31, 89, 94, 56, 34, 69, 98, 41, 53, 83, 77])

***Q. how to get 69***

69

***Q. How to get all the numbers in reverse order***

array([77, 83, 53, 41, 98, 69, 34, 56, 94, 89, 31])

***Q. Get numbers from 98 to 89 in reverse order***

array([98, 69, 34, 56, 94, 89])

**indexing 2D arrays, [start:stop:step, start:stop:step]**

In [50]:
my_arr = np.random.rand(4,5)
my_arr

array([[0.14977238, 0.48540151, 0.18861952, 0.56394028, 0.65155677],
       [0.06483871, 0.85172372, 0.07036724, 0.18779297, 0.20139519],
       [0.14114466, 0.70637476, 0.11831724, 0.95818229, 0.10999727],
       [0.17866609, 0.51638712, 0.11017563, 0.95823295, 0.54229687]])

In [51]:
my_arr[:,:] # npa[start:stop:step, start:stop:step]

array([[0.14977238, 0.48540151, 0.18861952, 0.56394028, 0.65155677],
       [0.06483871, 0.85172372, 0.07036724, 0.18779297, 0.20139519],
       [0.14114466, 0.70637476, 0.11831724, 0.95818229, 0.10999727],
       [0.17866609, 0.51638712, 0.11017563, 0.95823295, 0.54229687]])

In [52]:
my_arr[0:2,0:2] # get rows 0,1 and columns 0,1

array([[0.14977238, 0.48540151],
       [0.06483871, 0.85172372]])

In [53]:
# in Python, we generally use ':' after ',' to get all rows or columns. in R just ',' is enough

In [54]:
my_arr[1,:] #get row 1, all columns. In R, just my_arr[2,] would have worked.

array([0.06483871, 0.85172372, 0.07036724, 0.18779297, 0.20139519])

In [55]:
my_arr[:,1] # Get all rows, and first column

array([0.48540151, 0.85172372, 0.70637476, 0.51638712])

In [56]:
my_arr[1,2] # to get a particular element

0.0703672425125621

In [57]:
my_arr[1,1:3] # row 1 and columns from 1 to 2

array([0.85172372, 0.07036724])

In [58]:
my_arr

array([[0.14977238, 0.48540151, 0.18861952, 0.56394028, 0.65155677],
       [0.06483871, 0.85172372, 0.07036724, 0.18779297, 0.20139519],
       [0.14114466, 0.70637476, 0.11831724, 0.95818229, 0.10999727],
       [0.17866609, 0.51638712, 0.11017563, 0.95823295, 0.54229687]])

***Q. Get columns 0, 2, 4 in reverse order***

array([[0.65155677, 0.18861952, 0.14977238],
       [0.20139519, 0.07036724, 0.06483871],
       [0.10999727, 0.11831724, 0.14114466],
       [0.54229687, 0.11017563, 0.17866609]])

### 2.3 Boolean Masking

**Using conditionals in slicing**

In [None]:
my_arr = np.random.rand(10)
my_arr

In [None]:
my_arr > 0.5

In [None]:
my_arr[my_arr > 0.5] # get only those array elements whose value is greater than 0.5

In [None]:
my_arr2D = np.random.rand(3,5)
my_arr2D

In [None]:
my_arr2D > 0.5

In [None]:
my_arr2D[my_arr2D > 0.5]

In [None]:
my_arr2D[:,2] > 0.2

In [None]:
my_arr2D[(my_arr2D[:,2] > 0.2), :] # Get all rows, for which column 2 values are greater than 0.1

In [None]:
my_arr1 = np.random.randint(0,10,6)
my_arr2 = np.random.randint(0,10,6)
print ("my_arr1" + " is " + str(my_arr1))
print ("my_arr2" + " is " + str(my_arr2))

**Important to remember. and, or evalue the condition on the entire object. If we need element wise comparison, use &, |**

In [None]:
(my_arr1 > 5) & (my_arr2 > 5) 

In [None]:
(my_arr1 > -1) and (my_arr2 > -1)

### 2.4 Modifying arrays

In [None]:
my_arr = np.random.rand(4,5)
my_arr

In [None]:
my_arr[1,2] = 0.9
my_arr

In [None]:
my_arr[:,4] = np.array([8, 6, 9, 22]) # replacing all rows of 4th column with new values
my_arr

**Concatenation and splitting**

In [None]:
my_arr1 = np.array([1,2,3,4])
my_arr2 = np.array([9, 9, 9, 9])

In [None]:
np.vstack([my_arr1,my_arr2]) # vertical stacking

In [None]:
np.hstack([my_arr1,my_arr2]) # horozontal stacking np.concatenate also does something similar

In [None]:
my_arr = np.random.randint(0,10,(4,5))
my_arr

In [None]:
usplit, lsplit = np.vsplit(my_arr,[2]) # row index, where the split should happen. also try np.hsplit()

In [None]:
usplit

In [None]:
lsplit

**sub arrays are being aliased. If we mutate subarrays, origninals are also mutated. This is different from lists**

In [None]:
sub_arr = my_arr[:,1:2]
sub_arr

In [None]:
sub_arr[:,0] = np.array([9,9,9,9])

In [None]:
my_arr

### 2.5 UFuncs and Algebraic operations

In [None]:
al = [1,2,3,4] # With lists elementwise operations need for loops, 
# in R however, we can do algebraic operations on vectors, matrices, data frame directly.

In [None]:
al*4

In [None]:
al + [1,2,3,4]

In [None]:
[4*i for i in al] # this is more tedious. (Ofcourse list comprehensions are better than traditional for loops)

**See the power of numpy**

In [None]:
my_arr = np.arange(10)
my_arr

In [None]:
my_arr*4

In [None]:
my_arr-500

In [None]:
my_arr/5

In [None]:
my_arr*5+3 # we can even do operations in an expression. 

**All these operations are basically wrappers for built in np. ufunc**

In [None]:
np.multiply(my_arr,4) # this is same as my_arr*4, similarly we have np.add, np.substract, np.divide etc,=.

**Some more Ufuncs: max, min, mean, sum, sqrt,exp, sin, cos, log, argmin, argmax, std, var, just use np.function**
#### Go to this link https://docs.scipy.org/doc/numpy/reference/ufuncs.html for more functions.


In [None]:
np.mean(my_arr)

In [None]:
np.prod(my_arr)

In [None]:
np.sum(my_arr) # for several of these aggregating functions, we could also used methods of numpy array object itself (see below)

In [None]:
my_arr.sum()

In [None]:
np.sqrt(my_arr)

In [None]:
np.exp(my_arr)

**multi-dimensional aggregation**

In [None]:
my_arr = np.random.randn(3,5)
my_arr

In [None]:
np.mean(my_arr,0) # specify whether we want to apply the function along rows (0), or columns (1)

In [None]:
np.sum(my_arr,1)

**Few useful numpy methods**

In [None]:
# np.sum, np.any, np.all, np.prod, np.bincount, np.where
import numpy as np

In [None]:
my_arr = np.random.randint(0,10,20)
my_arr

**np.bincount**

In [None]:
np.bincount(my_arr) # bincount gives count (frequency) of each value starting from 0 to the largest value.

**np.where**

In [None]:
np.where(my_arr == 5) # gives the index position where value = 5

**np.sort**

In [None]:
np.sort(my_arr) # this doesn't happen inplace. To mutate the original use my_arr.sort()

In [None]:
np.argsort(my_arr) # this will give indexes after sorting the array

In [None]:
my_arr2D = np.random.rand(4,5)
my_arr2D

In [None]:
np.sort(my_arr2D, 0) # sort across each column

In [None]:
my_arr.max() # similar to Ufuncs, numpy objects themselves have several methods

In [None]:
my_arr.sum()

In [None]:
my_arr.argmax() # index position where maximum value occurs.