## Numpy Basics

In [1]:
import numpy as np
import time

In [3]:
num_elements = 1000000
input_array = np.arange(num_elements)
print(input_array)
print(input_array.shape)

[     0      1      2 ... 999997 999998 999999]
(1000000,)


<b>np.arange</b> is one of the routines to create arrays in NumPy. It returns an array (an instance of ndarray) of evenly spaced values.

In [8]:
print(np.arange(5)) # the i begins in 0

[0 1 2 3 4]


In [9]:
print(np.arange(2,10)) # (start, stop)

[2 3 4 5 6 7 8 9]


In [16]:
start_time = time.time()
return_array = [0] * len(input_array)
for k, v in enumerate(input_array): # populates the empty array
    return_array[k] = v * v
print(time.time() - start_time)

0.363400936126709


In [17]:
print([0]*5) # creates an empty array

[0, 0, 0, 0, 0]


In [19]:
print(return_array[2]) # sample of populated array

4


In [21]:
start_time = time.time()
return_array_vectorized = np.power(input_array,2) # (base, power)
print(time.time() - start_time)

0.021133899688720703


### Array operations

In [28]:
np.zeros(2) # creates and empty array

array([0., 0.])

In [26]:
np.identity(3) # identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [27]:
np.ones((2,3), dtype=np.int16) # dtype specifies the type of data

array([[1, 1, 1],
       [1, 1, 1]], dtype=int16)

In [32]:
array_1 = np.array([[1,2], [3,4], [5,6]]) # creates an array
array_1

array([[1, 2],
       [3, 4],
       [5, 6]])

In [35]:
print(array_1.size) # number of total elements
print(array_1.shape)
print(array_1.ndim) # number of axes or dimensions (i.e. x and y) 
print(array_1.itemsize) # number of bytes of each element (e.g. int is 8)
print(array_1.dtype)
print(array_1.nbytes) # 48 = 6 (elements in array) X 8 (itemsize)

6
(3, 2)
2
8
int64
48


In [36]:
array_1[1,1] # row, col

4

In [39]:
array_1[:,1] # for all rows (:) select the column 1

array([2, 4, 6])

In [41]:
array_1[1] # get row 1

array([3, 4])

In [42]:
array_1[0][1] # select a specific element

2

In [44]:
array_1[:, 0:1] # select for all rows, the colum [0:1] where the "1" is not inclusive

array([[1],
       [3],
       [5]])

In [46]:
array_2 = np.add(array_1, 1) # array broadcasting of addition and scalar multiplication
array_2

array([[2, 3],
       [4, 5],
       [6, 7]])

In [47]:
np.multiply(array_1, 2) # array_1 * 2

array([[ 2,  4],
       [ 6,  8],
       [10, 12]])

In [48]:
array_1

array([[1, 2],
       [3, 4],
       [5, 6]])

In [49]:
array_1 / 2

array([[0.5, 1. ],
       [1.5, 2. ],
       [2.5, 3. ]])

In [50]:
array_1 + array_2 # array elementwise addition

array([[ 3,  5],
       [ 7,  9],
       [11, 13]])

In [52]:
np.add(array_1, array_2) # same as previpus line

array([[ 3,  5],
       [ 7,  9],
       [11, 13]])

In [53]:
np.multiply(array_1, array_1)

array([[ 1,  4],
       [ 9, 16],
       [25, 36]])

In [54]:
array_3 = np.array([ [1,2]]) # shape (1,2)
array_4 = np.array([ [3], [4]]) # shape (2,1)
np.dot(array_3, array_4) # dot product

array([[11]])

In [55]:
array_5 = np.dot(array_4, array_3)
array_5

array([[3, 6],
       [4, 8]])

In [57]:
array_5.sum(axis=1) # sum across axis X

array([ 9, 12])

In [59]:
array_5.mean(axis=1) # mean across axis X

array([4.5, 6. ])

In [60]:
array_5.std(axis=0) # std across axis Y

array([0.5, 1. ])

In [61]:
array_5.max(axis=0)

array([4, 8])

In [65]:
print(array_1)
print(array_1.min())
print(array_1.max())
print(array_1.sum()) # sum of all the elements
print(array_1.mean()) 
print(array_1.var())
print(array_1.std())
print(array_1.sum(axis=1))
print(array_1.min(axis=0))
print(array_1.argmin()) # index of min element
print(array_1.argmax()) # index of max element
print(array_5.argsort())

[[1 2]
 [3 4]
 [5 6]]
1
6
21
3.5
2.9166666666666665
1.707825127659933
[ 3  7 11]
[1 2]
0
5
[[0 1]
 [0 1]]


In [66]:
array_5.shape

(2, 2)

In [67]:
array_5.reshape(4,1)

array([[3],
       [6],
       [4],
       [8]])

In [69]:
array_5.flatten() # get a copy of an array in one direction

array([3, 6, 4, 8])

### Linear algebra

In [73]:
print(array_1)
M1 = array_1.T # Transpose
M1

[[1 2]
 [3 4]
 [5 6]]


array([[1, 3, 5],
       [2, 4, 6]])

In [74]:
np.power(M1, 3) # matrix elementwise exponentiation

array([[  1,  27, 125],
       [  8,  64, 216]])

In [75]:
np.matmul(M1.T, M1) # or np.dot(M1.T, M1)

array([[ 5, 11, 17],
       [11, 25, 39],
       [17, 39, 61]])

In [77]:
print(np.matmul(M1, M1.T)) # Matrix multiplication IS NOT commutative
print(np.matmul(M1.T, M1))

[[35 44]
 [44 56]]
[[ 5 11 17]
 [11 25 39]
 [17 39 61]]


In [78]:
np.linalg.inv(np.array([[2,0],[0,2]]))

array([[0.5, 0. ],
       [0. , 0.5]])

In [79]:
np.linalg.inv(np.matmul(M1.T, M1)) # this should fail because it's a singular matrix (i.e. it's not invertible, it has a determinant of Zero)

LinAlgError: Singular matrix

In [82]:
A1 = np.array([[1,1], [0,1]]) # Evaluating linear systems Ax = b, for b
x1 = np.array([[2], [2]])
b1 = np.matmul(A1, x1)
b1

array([[4],
       [2]])

In [83]:
x1_verify = np.linalg.solve(A1, b1) # Solving linear systems Ax = b, for x
all(x1 == x1_verify)

True

In [84]:
# If A is singular, then the linear system may be "overdetermined" (i.e. no solution) when solving for x
A2 = np.array([[1,1], [2,2]]) # Note that the rank of this matrix is 1
x2 = np.array([[2], [3]])
potential_sol = np.matmul(A2, x2)
potential_sol

array([[ 5],
       [10]])

In [85]:
b2 = np.array(([5], [11]))
x2_verify = np.linalg.solve(A2, b2) # This should raise an exception
# There is no selection of x to solve this linear system for A2 and b2

LinAlgError: Singular matrix

In [86]:
x2_verify  = np.linalg.solve(A2, potential_sol) # This should raise an exception. There happens to exist an X to solve this system, but numpy still fails.

LinAlgError: Singular matrix

In [87]:
# Another linear system that is undetermined (many solutions) when solving for x
A3 = np.array([[1,0,0], [0,1,1]])
x3 = np.array([[1], [1], [1]])
b3 = np.matmul(A3, x3)
b3

array([[1],
       [2]])

In [89]:
np.linalg.solve(A3, b3) # This should raise an exception

LinAlgError: Last 2 dimensions of the array must be square

## K-nearest neighbors (KNN)

In [2]:
import numpy as np
import pylab as plt
import scipy.spatial.distance as scidist
np.random.seed(1234)

In [4]:
import seaborn as sns
sns.set_palette('bright') # set the matplotlib color cycle

In [10]:
def gen_data(N1=100, N2=100):
    """Generates 2D samples from two Gaussians centered at (1, 1) and (-1, -1).
    
    Parameters:
    N1 (int): Number of samples from Gaussian centered at (1, 1) labeled as 0
    N2 (int): Number of samples from Gaussian centered at (-1, -1) labeled as 1
    
    Returns:
    X: Sample data (D, N)
    Y: Labels (N,)
    """
    X1 = np.random.normal(0, 1, size=(2, N1)) + np.array([[1], [1]]) # np.random.normal generates a random sample from a normal dist. 
    X2 = np.random.normal(0, 1, size=(2, N2)) + np.array([[-1], [-1]]) # (loc=0.0, scale=1.0, size=None)
    Y1 = np.zeros((N1,))
    Y2 = np.zeros((N2,))
    X = np.hstack((X1, X2))
    Y = np.hstack((Y1, Y2))
    print('X contains %d Examples of Class 1 and %d Examples of Class 2 in %d dimensions.' % (X1.shape[1], X2.shape[1], X.shape[0]))
    print('Shape of X: %s' % (X.shape,))
    print('Shape of Y: %s' % (Y.shape,))
    return X, Y

In [11]:
print('Generate Train data ...')
Xtrain, Ytrain = gen_data()

Generate Train data ...
X contains 100 Examples of Class 1 and 100 Examples of Class 2 in 2 dimensions.
Shape of X: (2, 200)
Shape of Y: (200,)


In [12]:
# Example
np.random.normal(3, 2.5, size=(2,4))

array([[ 2.43341927,  0.6904232 ,  3.88959749, -0.1751587 ],
       [ 2.51131897,  1.8414515 ,  5.47353712,  6.47161845]])

In [17]:
A = np.random.normal(3, 2.5, size=(2,4))
print(A)
print(A + np.array([ [1], [1]])) # np.array([ [1], [1]]) make possible for the distribution to be "re-centered" at (1,1)
print(A + np.array([ [-1], [-1]]))

[[1.28967918 4.49990521 2.45700219 3.8288163 ]
 [2.04158855 1.04545978 3.66559352 3.69857006]]
[[2.28967918 5.49990521 3.45700219 4.8288163 ]
 [3.04158855 2.04545978 4.66559352 4.69857006]]
[[0.28967918 3.49990521 1.45700219 2.8288163 ]
 [1.04158855 0.04545978 2.66559352 2.69857006]]
