# Numpy Intro

In [None]:
!pip install matplotlib
%matplotlib inline 
%run mplimp.py

In [None]:
import numpy as np

In [None]:
np.random.seed(1337)

### Basics of Matrices

In [None]:
x = np.array(  [1, 4, 3]  )
x

In [None]:
y = np.array([ [1, 4, 3], 
               [9, 2, 7] ] )
y

In [None]:
x.shape

In [None]:
y.shape

In [None]:
z = np.array(  [ [1, 4, 3] ]  )   # What's the difference?

In [None]:
z.shape

In [None]:
z = np.arange(1, 2000, 1)  # start, end, step
z[:10]

In [None]:
z.shape

In [None]:
np.arange(0.5, 3, 0.5)

In [None]:
np.arange(0.5, 10, 1).shape

In [None]:
np.arange(0.5, 10, 1).reshape(5, 2).shape

In [None]:
np.arange(0.5, 10, 1).reshape(5, 3).shape  # !!

In [None]:
# Evenly spaced but we don't know the step 
np.linspace(3, 9, 10)

In [None]:
print(x)
print(x[1]) 
print(x[1:])

In [None]:
print(y)
y[0, 1]   # this is extra in numpy

In [None]:
y[:, 1]

In [None]:
y[:, [1, 2]]

### Matrix Operations

In [None]:
np.zeros((3, 5))

In [None]:
np.ones((5, 3))

In [None]:
a = np.arange(1, 7)
a

In [None]:
a.shape

In [None]:
a[3] = 7
a

In [None]:
a[:3] = 1   # Assign to multiple locations 
a

In [None]:
a[1:4] = [9, 8, 7]
a

In [None]:
# This is most useful 
b = np.zeros((2, 2))
b[0, 0] = 1
b[0, 1] = 2
b[1, 1] = 4
b

In [None]:
b.shape

### Array Operations

In [None]:
print(b)

In [None]:
b + 2

In [None]:
2 * b

In [None]:
b ** 2

In [None]:
sum(b)

In [None]:
b

In [None]:
b.sum(axis=0).shape

In [None]:
b.sum(axis=1).shape

In [None]:
b = np.array([[1, 2], [3, 4]])
d = np.array([[3, 4], [5, 6]])

In [None]:
print(b) 
print(d)

In [None]:
b + d

In [None]:
b * d     # what operation is this?

In [None]:
b.dot(d)  # and this?

In [None]:
b ** d

In [None]:
b.T

In [None]:
a

In [None]:
a.shape

In [None]:
a.T

In [None]:
a.T.shape

In [None]:
a.reshape(6,1).T.shape

In [None]:
# Numpy has "broadcasting" or "mapping" functions 
print(np.sqrt(36))

# works on both scalars and arrays 
x = [1, 4, 9, 16]
np.sqrt(x)

In [None]:
# Checking conditions 
x = np.array([1, 2, 4, 5, 9, 3])
y = np.array([0, 2, 3, 1, 2, 3])

In [None]:
x > 3

In [None]:
x > y

## Misc Operations with Numpy

In [None]:
import math
def basic_sigmoid(x):
    """
    Compute sigmoid of x.

    Arguments:
    x -- A scalar

    Return:
    s -- sigmoid(x)
    """
    
    s = 1./(1. + math.e ** (-x))
    
    return s

In [None]:
basic_sigmoid(-1)

In [None]:
basic_sigmoid(0)

In [None]:
x = [-1, 0, 3]
basic_sigmoid(x)   # Why

In [None]:
import numpy as np

x = [-1, 0, 3]
x = np.array(x)
basic_sigmoid(x)

### Broadcasting

In [None]:
import numpy as np

In [None]:
# What is broadcasting? 
x = np.array([1, 2, 3])
x * 3  # This makes sense

In [None]:
x + 3   # Does this?

General rule: Two dimensions are compatible when
- they are equal, or
- one of them is 1

In [None]:
x = np.arange(4)
xx = x.reshape(4, 1)
y = np.ones(5)
z = np.ones((3,4))

print("x =  ", x)
print("xx = ", xx) 
print("y =  ", y)

print("Shapes: ") 
print(x.shape)
print(xx.shape)
print(y.shape)

In [None]:
x + y    # does not work

In [None]:
xx.shape, x.shape

In [None]:
print(y)
print(xx)

In [None]:
out = xx + y       # xx is broadcast over the columns of y

In [None]:
out

In [None]:
out.shape

In [None]:
np.array([1]) + y      # element is broadcast over y

In [None]:
print(z)
z.shape

In [None]:
x.shape

In [None]:
x

In [None]:
z + x

In [None]:
a = np.array([[ 0.0 , 0.0,  0.0  ],
              [ 10.0, 10.0, 10.0 ],
              [ 20.0, 20.0, 20.0 ],
              [ 30.0, 30.0, 30.0 ]])

b = np.array( [ 1.0,  2.0,  3.0  ] )

In [None]:
a + b

In [None]:
a = np.array([[ 0.0 , 0.0,  0.0  , 0.0  ],
              [ 10.0, 10.0, 10.0 , 10.0 ],
              [ 20.0, 20.0, 20.0 , 20.0 ],
              [ 30.0, 30.0, 30.0 , 30.0 ]])

b = np.array( [ 1.0,  2.0] )

In [None]:
a + b

In [None]:
a = np.array( [ 0.0, 10.0, 20.0, 30.0 ] )
b = np.array( [ 1.0, 2.0,  3.0 ]        )

In [None]:
a + b    # can't broadcast

In [None]:
print(a.shape, b.shape)

In [None]:
a1 = a.reshape(4, 1)

In [None]:
a1 + b

In [None]:
print(a1.shape, b.shape)

In [None]:
print(a1)

### Normalizing Rows and Columns

Often, we wish to normalize value of a row to fall within a specific range.

In [None]:
x = np.array([ [ 0, 3, 4 ],
               [ 1, 6, 4 ] ])

In [None]:
x_norm = np.amax(x, axis=1)   # get row-wise max 
print(x_norm)

In [None]:
x / x_norm     # this does not work. Why?

In [None]:
print(x.shape)
print(x_norm.shape)

In [None]:
x_norm = x_norm.reshape(2, 1)

In [None]:
x_norm

In [None]:
x / x_norm

In [None]:
# We can also use another normalization method 
x = np.array([ [ 0, 3, 4 ],
               [ 1, 6, 4 ] ])

# no need to reshape again 
x_norm = np.linalg.norm(x, ord = 2, axis = 1, keepdims = True) 
x / x_norm

In [None]:
x_norm.shape

In [None]:
help(np.linalg.norm)

### Reshaping Revisited

Pay attention to the tensor below!

In [None]:
i = np.array([
    [  [ 0.1,  0.1,  0.9 ], [ 0.2,  0.1,  0.9 ], [ 0.3,  0.1,  0.9 ]  ],
    
    [  [ 0.1,  0.2,  0.9 ], [ 0.2,  0.2,  0.9 ], [ 0.3,  0.2,  0.9 ]  ],

    [  [ 0.1,  0.3,  0.9 ], [ 0.2,  0.3,  0.9 ], [ 0.3,  0.3,  0.9 ]  ], 
    
    [  [ 0.1,  0.4,  0.9 ], [ 0.2,  0.4,  0.9 ], [ 0.3,  0.4,  0.9 ]  ]

])

NameError: name 'np' is not defined

In [None]:
i.shape

In [None]:
%matplotlib inline 
import matplotlib.pyplot as plt
import numpy as np

In [None]:
_ = plt.imshow(i)

In [None]:
i.reshape(36)    # we get a column "vector"

In [None]:
print(i.shape)
i_sh = i.shape
i.reshape(i_sh[0] * i_sh[1] * i_sh[2], 1)  # Also known as "flattening"

### Vectorization


In [None]:
dim = 100    # try increasing this after running experiment

In [None]:
A = np.random.rand(dim, dim)
B = np.random.rand(dim, dim)

In [None]:
A[0].size, A[1].size

In [None]:
def add_arrays(A, B): 
    C = np.zeros((A[0].size, A[1].size))
    
    for i in range(A[0].size): 
        for j in range(A[1].size): 
            C[i, j] = A[i, j] + B[i, j]
    return C

In [None]:
%time C = add_arrays(A, B)

In [None]:
%time C = A + B

In [None]:
import time 

# Non-vectorized time 
start = time.time() 

C = add_arrays(A, B)

end = time.time() 
non_vec_time = end - start 


## Vectorized time 
start = time.time() 

C = A + B 

end = time.time() 
vec_time = end - start

In [None]:
vec_time / non_vec_time * 100

In [None]:
%%time
total = 0
for i in np.arange(100_000_000): 
    total += i 
print(total)

In [None]:
%time sum(np.arange(100_000_000))

Moral of the story: don't write your own code when performance is an issue.