### Numpy Basics:  Arrays and Vectorized Computation

In [None]:
import numpy as np

In [18]:
#Generate random data in the form of an array, randn func draws samples from normal dist with mean 0 and std 1

data = np.random.randn(2,3)


In [3]:
data

array([[ 0.80798958,  0.27310563, -1.65341462],
       [-0.0405285 , -0.83230578, -0.24842189]])

In [4]:
#Perform basic operations on the data 

data * 20

array([[ 16.15979161,   5.46211251, -33.06829239],
       [ -0.81056991, -16.64611563,  -4.96843776]])

In [5]:
data + data

array([[ 1.61597916,  0.54621125, -3.30682924],
       [-0.08105699, -1.66461156, -0.49684378]])

In [6]:
data.shape

(2, 3)

In [7]:
data.dtype

dtype('float64')

In [15]:
#Nested sequences, such as two lists of equal-length, will be converted into a multidimensional array
data1 = [[1,2,3,4], [5,6,7,8]] #List of lists
arr1 = np.array(data1)


In [16]:
arr1

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [18]:
#Generates an identity matrix with 1's on the diagonal and zeros filling the rest out
np.identity(15)


array([[ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
       

In [19]:
#Other built-in array functions 
np.zeros(10)


array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [21]:
np.zeros((3,6))


array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [25]:
#Vectorization -> any arithmetic operations between equal-size arrays

hello = np.array([[1., 5., 7.], [4., 8., 10.]])
hello

array([[  1.,   5.,   7.],
       [  4.,   8.,  10.]])

In [26]:
hello * hello

array([[   1.,   25.,   49.],
       [  16.,   64.,  100.]])

In [27]:
1/hello

array([[ 1.        ,  0.2       ,  0.14285714],
       [ 0.25      ,  0.125     ,  0.1       ]])

In [31]:
#Example using indexing

world = np.arange(20) #returns an ndarray instead of a list using 'arange' function


In [32]:
world

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [33]:
world[5]


5

In [34]:
world[5:9]

array([5, 6, 7, 8])

In [38]:
#Example using 'slice'

world[5:9] = 12

In [39]:
new = world[5:9]


In [40]:
new

array([12, 12, 12, 12])

In [46]:
# ':' slice will assign to all values in an array

new [:] = 54


In [47]:
world

array([ 0,  1,  2,  3,  4, 54, 54, 54, 54,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [3]:
# Boolean indexing - used when we have duplicates

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7,4)
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'],
      dtype='<U4')

In [4]:
data

array([[ 1.3117209 , -0.98539538,  1.9719839 ,  0.2391028 ],
       [-0.0388171 , -0.58923085,  0.03288173,  0.30075586],
       [-0.43513001,  1.11685136, -0.63064229,  0.88883894],
       [ 0.26537849,  0.60699237,  0.98315819, -1.42003413],
       [ 1.43710888,  1.53274153,  0.37481552,  0.41049196],
       [-0.67927821,  0.46740291, -0.17100153,  0.08298102],
       [ 0.15665615, -0.60000675, -1.49626897, -0.73117449]])

In [5]:
names == 'Bob'

array([ True, False, False,  True, False, False, False], dtype=bool)

In [7]:
data[names == 'Bob']


array([[ 1.3117209 , -0.98539538,  1.9719839 ,  0.2391028 ],
       [ 0.26537849,  0.60699237,  0.98315819, -1.42003413]])

In [13]:
# Fancy indexing - using integer arrays

arr = np.empty((5,3))

for i in range(5):
    arr[i] = i
    arr

In [14]:
# Transposing Arrays

arr2 = np.arange(16).reshape((4,4))
arr2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [15]:
arr2.T

array([[ 0,  4,  8, 12],
       [ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15]])

In [17]:
# Mathematical and Statistical methods using Arrays; i.e., aggregations, often also called, reductions

fun = np.random.randn(5,4)
fun

array([[-1.59964906, -1.02962146,  0.86821413,  0.32409465],
       [-1.01403879, -0.58043194, -0.26275176,  1.47885202],
       [-1.32972372, -0.06553889,  0.52742578, -0.64670116],
       [ 0.7785618 ,  0.54960465, -2.14393627, -0.60372329],
       [-0.89332724, -0.1840842 , -1.66946919, -1.25470453]])

In [18]:
fun.mean()


-0.43754742408048442

In [19]:
np.mean(fun)


-0.43754742408048442

In [20]:
fun.sum()


-8.7509484816096883

In [7]:
# Sorting method on arrays and by default it will sort in ascending order

arrayz = np.random.randn(7)


In [8]:
arrayz

array([-0.08337157, -0.82160816,  1.80325979,  0.28243602,  0.99845945,
        0.68857274,  1.7385634 ])

In [9]:
arrayz.sort()


In [10]:
arrayz

array([-0.82160816, -0.08337157,  0.28243602,  0.68857274,  0.99845945,
        1.7385634 ,  1.80325979])

In [11]:
# Sort unique values in an ndarray structure

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Joe', 'Joe'])
np.unique(names)


array(['Bob', 'Joe', 'Will'],
      dtype='<U4')

In [12]:
intz = np.array([3,3,4,4,4,4,5,5,6,7,8,6])
np.unique(intz)


array([3, 4, 5, 6, 7, 8])

In [14]:
# test membership of values in one array in another

values = np.array([6,0,0,3,2,5,6])
np.in1d(values, [2,3,6])


array([ True, False, False,  True,  True, False,  True], dtype=bool)

In [16]:
# Samples from the standard normal distribution by calling normal func

samples = np.random.normal(size=(4,4))
samples

array([[ 2.20473493, -0.63616445,  1.0247614 , -0.20296435],
       [-1.10828716,  1.10745515, -0.11339146,  1.00276138],
       [ 0.14327598,  1.28420753, -0.85451446,  0.0888386 ],
       [ 0.60355473,  0.81215104, -0.16565535,  1.80291749]])