<a href="https://colab.research.google.com/github/iamvarada/Python/blob/master/numpy_cheatsheet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## Numpy cheatsheet and workbook

In [2]:
import numpy as np

my_list = [1,2,3,4,5]
arr = np.array(my_list)
type(arr)

numpy.ndarray

In [7]:
# Efficiency comparison with list

# memory check
S = range(1000) # 1000 elements
D = np.arange(1000) # 1000 elements from similar function from numpy

import sys
sys.getsizeof(5)*len(S),'bytes' # num bytes occupied by a variable


(28000, 'bytes')

In [8]:
D.itemsize*D.size,'bytes' # returns size occupied by single element in the array

# memory efficiency very high for numpy compared to built-in structures like list

(8000, 'bytes')

In [11]:
# time efficiency check
import time

SIZE = 1000000

L1 = range(SIZE)
L2 = range(SIZE)

A1 = np.arange(SIZE)
A2 = np.arange(SIZE)

start_time = time.time()
result = [(x+y) for x,y in zip(L1,L2)] # list comprehensiion, zip() performs elementwise computation
end_time = time.time()

print('Time taken by list to add values ', (end_time - start_time)*1000, 'ms')

start_time = time.time()
result = A1+A2
end_time = time.time()

print("Time taken by numpy to add values ", (end_time - start_time)*1000, "ms")

Time taken by list to add values  66.49637222290039 ms
Time taken by numpy to add values  16.24774932861328 ms


In [17]:
# 1D array
a = np.array([1,2,3,4,5])
print(a, a.shape, a.ndim)

[1 2 3 4 5] (5,) 1


In [20]:
# 2D array
b = np.array([[1,2,3],
              [4,5,6]
              ])
print(b, '\n', b.shape, b.ndim)

[[1 2 3]
 [4 5 6]] 
 (2, 3) 2


In [21]:
# 3D array
c = np.array([
             [[1,2,3], [4,5,6]],
             [[7,8,9], [10,11,12]]
              ])

print(c, '\n', c.shape, c.ndim)

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]] 
 (2, 2, 3) 3


In [25]:
# Re-shaping the arrays

e = np.array([[1,2,3], [4,5,6]])
print(e, '\n' , e.shape)

[[1 2 3]
 [4 5 6]] 
 (2, 3)


In [26]:
e.reshape(3,2)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [30]:
# Flatten the array to 1D array

print(e.flatten())
print(e.flatten().shape)

[1 2 3 4 5 6]
(6,)


In [33]:
# Stacking -- Merging two nd arrays

f = np.array([1,2,3])
g = np.array([4,5,6])

# Horizontal stacking
print('Horizontal stacking \n', np.hstack((f,g))) # passing both arrays as tuple (aka (a,b))

# Vertical stacking
print('Vertical stacking results in 2D array \n', np.vstack((f,g))) # passing both arrays as tuple (aka (a,b))

Horizontal stacking 
 [1 2 3 4 5 6]
Vertical stacking results in 2D array 
 [[1 2 3]
 [4 5 6]]


In [36]:
# Slicing

e = np.array([(1,2,3), (4,5,6)])
print(e)

[[1 2 3]
 [4 5 6]]


In [39]:
# Indexing -- a form of basic slicing
print(e[0])
print(e[1])

[1 2 3]
[4 5 6]


In [43]:
# Slicing
print(e[:,1]) # all rows and first column (starts from 0)
print(e[1,:2]) # 1st row, 2 columns starting from 0

[2 5]
[4 5]


In [44]:
# Intializing float arrays
list_ex = [[0,1,2], [3,4,5]]
arr = np.array(list_ex, dtype = 'float')
print(arr)

[[0. 1. 2.]
 [3. 4. 5.]]


In [45]:
# Converting to another datatype after assiging the array

print(arr.astype('int'))

[[0 1 2]
 [3 4 5]]


In [47]:
# Convering array to list

print(arr.tolist(), type(arr.tolist()))

[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]] <class 'list'>


In [50]:
# Manipulating arrays

# Missing values
arr = np.array([[1,2,3], [4,5,6], [7,8,9]])
print(arr, arr.shape)

[[1 2 3]
 [4 5 6]
 [7 8 9]] (3, 3)


In [51]:
arr[1,1] = np.nan
# gives error as nd arrays can only of one datatype, nan is float, arr is integer right

ValueError: ignored

In [55]:
arr = arr.astype('float')
arr

array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]])

In [58]:
arr[1,1] = np.nan
arr[2,1] = np.inf
arr

array([[ 1.,  2.,  3.],
       [ 4., nan,  6.],
       [ 7., inf,  9.]])

In [59]:
# Statistics

# Computing mean, min, max on mdarray
arr[1,1] = 10
arr[2,1]  = 20
arr


array([[ 1.,  2.,  3.],
       [ 4., 10.,  6.],
       [ 7., 20.,  9.]])

In [60]:
print('Mean: ', arr.mean())
print('Min: ', arr.min())
print('Max: ', arr.max())

Mean:  6.888888888888889
Min:  1.0
Max:  20.0


In [62]:
# Column-wise stats
print('Column-wise min', np.amin(arr, axis = 0)) # axis = 0 means column-wise

# Row-wise stats
print('Row-wise min', np.amin(arr, axis=1)) # axis = 1 means row-wise

Column-wise min [1. 2. 3.]
Row-wise min [1. 4. 7.]


In [63]:
# Filtering the data using numpy expressions

a = np.array([[1,2,3], [2,3,4], [7,8,9]])
print(a, a.shape, a.ndim)

[[1 2 3]
 [2 3 4]
 [7 8 9]] (3, 3) 2


In [67]:
# Find values in the array greater than 2
bool_idx = (a > 2)
print(bool_idx)

print('\n\nValues at all indices where the boolean expression is true')
a[bool_idx] 

[[False False  True]
 [False  True  True]
 [ True  True  True]]


Values at all indices where the boolean expression is true


array([3, 3, 4, 7, 8, 9])

In [73]:
# Math operations on nd array

x = np.array([[1,2], [3,4]], dtype = 'float')
y = np.array([[5,6], [7,8]], dtype = np.float64) # another way to specify the data type

print(x)
print(y)

# Element wise

# subract 
print(x-y)

# same as
np.subtract(x,y)

# square root
np.sqrt(x)

[[1. 2.]
 [3. 4.]]
[[5. 6.]
 [7. 8.]]
[[-4. -4.]
 [-4. -4.]]


array([[1.        , 1.41421356],
       [1.73205081, 2.        ]])

In [75]:
# Column-wise and row-wise math operations
print(np.sum(x)) # all elements of array

print(np.sum(x, axis = 0)) # column-wise
print(np.sum(x, axis = 1)) # row-wise

10.0
[4. 6.]
[3. 7.]


In [76]:
# Basic word problem

# distance travelled by riders
dist = [181, 222, 445, 467]
# time taken by each rider
time = [2, 5, 6, 7]

# speed of each rider
speed = dist/time # error as list is not divisible element wise

TypeError: ignored

In [79]:
# Convert to np array and it makes life easier
dist = np.array(dist)
time = np.array(time)

speed = dist/time
print(speed)

[90.5        44.4        74.16666667 66.71428571]


In [80]:
# Another word problem

hour_wage = np.array([12,45,677]) # in INR

# conver to dollars (x80 for example)

hour_wage*80

array([  960,  3600, 54160])

In [81]:
# Another problem

weekly_hrs = np.array([40,506, 69076, 33])
weekly_hrs[weekly_hrs >= 40]

array([   40,   506, 69076])

In [82]:
# Same thing using numpy logical operators
weekly_hrs[np.logical_not(weekly_hrs < 40)]

array([   40,   506, 69076])

In [84]:
np.logical_and(weekly_hrs > 30, weekly_hrs <50)

array([ True, False, False,  True])

In [87]:
# Create arrays of ones

print(np.ones((3,4))) # 3 rows, 4 columns
print('\n\n')
print(np.zeros((1,2)))

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]



[[0. 0.]]


In [88]:
# Random values
np.random.random((2,2))

array([[0.36653132, 0.27288394],
       [0.47035967, 0.42114506]])

In [89]:
# Empty array
np.empty((3,5))

array([[2.56032802e-316, 0.00000000e+000, 0.00000000e+000,
        4.79243676e-322, 6.92997366e-310],
       [3.53407133e-316, 4.31174539e-096, 9.80058441e+252,
        1.23971686e+224, 1.05235720e-153],
       [9.03292329e+271, 9.08366793e+223, 1.06244660e-153,
        3.44981369e+175, 7.11454530e-322]])

In [92]:
# Full array

np.full((2,2), 7)

array([[7, 7],
       [7, 7]])

In [94]:
# Evenly spaced array
print(np.arange(10,25,5)) # start from 10, space of 5, end at 25
print(np.linspace(0,2,9)) # min = 0, max = 2, total elements = 9

[10 15 20]
[0.   0.25 0.5  0.75 1.   1.25 1.5  1.75 2.  ]
