In [1]:
# Ch04 NumPy Basics: Array and Vectorized Computation

# NumPy, short for Numerical Python, 
# is one of the most important foundational packages for numerical computing in Python. 
# Numpy is more efficiency on large arrays of data, generally 10 to 100 times fast

import numpy as np

my_array = np.arange(1000000)

my_list = list(range(1000000))

%time for _ in range(10): my_array2 = my_array*2

%time for _ in range(10): my_list2 = my_list*2

Wall time: 26.1 ms
Wall time: 409 ms


In [None]:
# 4.1 The NumPy ndarry: A Multidimensional Array Object

In [2]:
# Page 88

import numpy as np

data = np.random.randn(2,3)

In [3]:
data

array([[-0.18943887, -0.45165516, -0.28524931],
       [ 2.63580682, -0.70136635,  0.66278067]])

In [4]:
data*10

array([[-1.89438866, -4.51655163, -2.85249308],
       [26.35806817, -7.01366351,  6.62780668]])

In [5]:
data + data

array([[-0.37887773, -0.90331033, -0.57049862],
       [ 5.27161363, -1.4027327 ,  1.32556134]])

In [6]:
data.shape

(2, 3)

In [7]:
data.dtype

dtype('float64')

In [8]:

# 
list = [6, 7.5, 8, 0, 1]    # <- data type is list
list

[6, 7.5, 8, 0, 1]

In [9]:
list.dtype    # Error, only np array has dtype function

AttributeError: 'list' object has no attribute 'dtype'

In [10]:
list.shape          # Error, only np array has shape function

AttributeError: 'list' object has no attribute 'shape'

In [13]:
# convert list to array
my_array = np.array(list)
my_array 

array([6. , 7.5, 8. , 0. , 1. ])

In [14]:
list

[6, 7.5, 8, 0, 1]

In [16]:
# only show last output
my_array
my_array.shape

(5,)

In [17]:
my_array

array([6. , 7.5, 8. , 0. , 1. ])

In [18]:
my_array.shape

(5,)

In [19]:
# Use print to show all
print(my_array)
print(my_array.shape)

[6.  7.5 8.  0.  1. ]
(5,)


In [22]:
data = [[1,2,3],
        [4,5,6]]
print(data)
#print(data.shape)          # Error, only np array has shape function

[[1, 2, 3], [4, 5, 6]]


In [23]:
# convert list to array
array2 = np.array(data)
array2
  

array([[1, 2, 3],
       [4, 5, 6]])

In [25]:
# Use tab to show all possible functions
array2.shape

(2, 3)

In [26]:
# test list
myList =[1, 3, 'a']
myList

[1, 3, 'a']

In [27]:
#np.array tries to infer a good data type for the array that it creates
data = [1,2,3]
array3 = np.array(data)

data = [1, 2, 3.0]
array4 = np.array(data)

data = [1,2,3, 'A']
array5 = np.array(data)

print(array3.dtype)
print(array4.dtype)
print(array5.dtype)

int32
float64
<U11


In [29]:
# Page 89
# Use zeros, ones , empty to creates arrays
#  zeros and ones create arrays of 0s or 1s, 
# pass a tuple for the shape:

my_array = np.zeros(10)
print(my_array)
print(my_array.shape)


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
(10,)


In [30]:
my_array = np.zeros((2,4))
print(my_array)
print(my_array.shape)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
(2, 4)


In [31]:
my_array = np.zeros((2,3,2))
print(my_array)
print(my_array.shape)

[[[0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]]]
(2, 3, 2)


In [34]:
my_array = np.ones(10)
print(my_array)
print(my_array.shape)


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
(10,)


In [35]:
my_array = np.ones((2,5))
print(my_array)
print(my_array.shape)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
(2, 5)


In [36]:
my_array = np.ones((2,3,2))
print(my_array)
print(my_array.shape)

[[[1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]]]
(2, 3, 2)


In [37]:
# empty creates an array without initializing its values to any particular value
print('\n----------------')
my_array = np.empty(10)
print(my_array)
print(my_array.shape)

my_array = np.empty((2,5))
print(my_array)
print(my_array.shape)


----------------
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
(10,)
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
(2, 5)


In [40]:
# arange is an array- value
my_list = range(10)     # < - this is list
print(my_list)

#my_list.shape      # <-Error
my_list

range(0, 10)


range(0, 10)

In [41]:
#vs.
my_array = np.arange(10)      # < - this is array
print(my_array)
my_array.shape

[0 1 2 3 4 5 6 7 8 9]


(10,)

In [None]:
# Page 90 Table 4-1 Array creation functions
# array, arange, ones, ones_like, zeros


In [43]:
# Page 95 Arithmetic with NumPy Array
# list
my_list = [1, 2, 3, 4, 5]
print(my_list)
# np array
my_array = np.array([1,2,3,5])
print(my_array)



----------------
[1, 2, 3, 4, 5]
[1 2 3 5]


In [44]:
# list vs. array
# + , *
print(my_list)
list5 = my_list + 1       # Error, + is not work for list

[1, 2, 3, 4, 5]


TypeError: can only concatenate list (not "int") to list

In [45]:
print(my_array)
array5 = my_array + 1       # OK
array5

[1 2 3 5]


array([2, 3, 4, 6])

In [47]:
# list vs. array
print(my_list)
list = my_list + my_list         # list, concatation
print(list)
print('\n----------------')
print(my_array)
arr = my_array + my_array       # add value
print(arr)

[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

----------------
[1 2 3 5]
[ 2  4  6 10]


In [50]:
print('\n----------------')
# list vs. array
#res =  1/ my_list         # list, not work

print(my_array)
res = 1/ my_array       # add value
print(res)



----------------
[1 2 3 5]
[1.         0.5        0.33333333 0.2       ]


In [53]:
res = my_array > 0.3
res

array([ True,  True,  True,  True])

In [64]:
# Page 96
# Basic Indexing and Slicing

my_array = np.arange(10)

print (my_array)
print (my_array[5])
print (my_array[5:8])



[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]


In [65]:
print (my_array[2:3])

[2]


In [66]:
my_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [67]:
# copy a reference
my_array[5:8] = 0
print (my_array)

print('----------------')
arry_slice = my_array[5:8]
print (arry_slice)

print('----------------')
arry_slice[0] = 99
print (arry_slice)
print (my_array)        # notice that my_array is changed too, copy a reference

[0 1 2 3 4 0 0 0 8 9]
----------------
[0 0 0]
----------------
[99  0  0]
[ 0  1  2  3  4 99  0  0  8  9]


In [70]:
# copy values
print (arry_slice)
print (my_array) 
arry_slice = my_array[5:8].copy()
arry_slice[1] = -23
print (arry_slice)
print (my_array)        # notice that my_array is not changed, copy values

[ 99 -99   0]
[ 0  1  2  3  4 99  0  0  8  9]
[ 99 -23   0]
[ 0  1  2  3  4 99  0  0  8  9]


In [79]:
print('\n----------------')
# Page 97 -  two dimentional array
# Page 99
array_2d = np.array([[1,2,3,4],
                     [5,6,7,8],
                     [6,7,8,9]])
print(array_2d)
print(array_2d[1][3])


----------------
[[1 2 3 4]
 [5 6 7 8]
 [6 7 8 9]]
8


In [80]:
# same
print(array_2d[1, 2])
print(array_2d[1])      # The second row 
print(array_2d[:,1])    # The second col

7
[5 6 7 8]
[2 6 7]


In [82]:
# page 101
# Boolean Indexing

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
print(names)
print(names.dtype)

# data = np.random.randn(7,4)
# data = np.random.randn(7,4)
data = np.array( [ [0, 0, 0, 0],
                 [1, -1, 1, 2],
                 [2, -2, 2, 2],
                 [3, -3, 3, 3],
                 [4, -4, 4, 4],
                 [5, -5, 5, 5] ])
print(data)

print(data.shape)


['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
<U4
[[ 0  0  0  0]
 [ 1 -1  1  2]
 [ 2 -2  2  2]
 [ 3 -3  3  3]
 [ 4 -4  4  4]
 [ 5 -5  5  5]]
(6, 4)


In [83]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [84]:
data

array([[ 0,  0,  0,  0],
       [ 1, -1,  1,  2],
       [ 2, -2,  2,  2],
       [ 3, -3,  3,  3],
       [ 4, -4,  4,  4],
       [ 5, -5,  5,  5]])

In [85]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [88]:
# The boolean array must be of the same length the data array
res = (names =='Bob')     
print(res)

[ True False False  True False False False]


In [None]:
print('\n----------------')
print(data[names =='Bob', 2:] )

In [None]:
print('\n----------------')
print(data[names =='Bob', 3] )

In [None]:
print('\n----------------')
print( data[names !='Bob'] )


print('\n----------------')
mask = (names == 'Bob') | (names == 'Will')     # Or: |, and : &
print(mask)

print(data)
print('\n----------------')
print(data[mask])


print('\n----------------')
data[ data<0 ] = 0
print(data)

print('\n----------------')
data[ names =='Joe'] = 7
print(data)

In [None]:
# Page 104
# Fancy Indexing

arr = np.empty((8,4))

for i in range(8):
    for j in range (4):
        arr[i][j] = i                     #arr[i,j] is same as arr[i][j]
print( arr)

print('\n----------------')
# Same
for i in range(8):
    arr[i]=i
print(arr)

In [None]:
# Page 105
# Transposing Array and Swapping Axes

arr = np.arange(15)
print(arr)

print('\n----------------')
arr = arr.reshape([3,5])
print(arr)

print('\n----------------')
# Transposing Array: used quiet often: subtotal by product vs. subtotal by Quartly
new_arr = arr.T
print(new_arr)

print('\n----------------')
print(arr)   # origianl arry doesn't change

new_arr = np.dot(arr.T, arr)
print(new_arr)

#
arr = np.arange(16).reshape([2,2,4])
print(arr)
# data = np.random.randn(7,4)
data = np.array( [ [0, 0, 0, 0],
                 [1, -1, 1, 2],
                 [2, -2, 2, 2],
                 [3, -3, 3, 3],
                 [4, -4, 4, 4],
                 [5, -5, 5, 5] ])
print(data.T)

In [None]:
# page 107
# Universal Functions
# Page 109 Table 4-3 Unary ufuncs: 
# abs, fabs, sqrt,square, exp, log,log10, sign, ceil, floor, rint, modf, isnun,

arr = np.arange(5)
print(arr)

print( np.sqrt(arr))

print( np.exp(arr))

x = np.random.randn(8)
print(x)

y = np.rint(x)
print(y)

z = np.floor (x *10)
print(z)

# Page 109 Binary universal functions
# add, subtract, multiply, divide

print('\n----------------')
w = y + z
print(w)

# use : add
w = np.add(y,z)
print(w)

print('\n----------------')
w = y / z
print(w)

# use : divide
w = np.divide(y,z)
print(w)

In [None]:
# page 110
# 4.3 Array-Oriented Programming with Arrays : replace to use loop

x = np.arange(-5,5, 0.01)  # 1000 
y = np.arange(-5,5, 0.01)  # 1000 

xs, ys = np.meshgrid(x, y)

z = np.sin(xs**2 + ys**2) / (xs**2 + ys**2)

#print(help(np.meshgrid))     # <- Use help

import matplotlib.pyplot as plt
plt.imshow(z)
#plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
#h = plt.contourf(x,y,z)
#plt.show()

In [None]:
# page 111
# Expressing ConditionalLogic as Array Operations

x_arry = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
y_arry = np.array([2.1, 2.2, 2.3, 2.4, 2.5])

print( x_arry > y_arry)
cond_array = np.array([True, False, True, False, True])

result = np.where(cond_array, x_arry, y_arry)
print(result)

print('\n----------------')
a = np.random.randn(4,4)
print(a)

b = np.where(a>0,2,-2)
print(b)

c = np.where(a>0,2,a)
print(c)

print('\n----------------')
# more example (from numpy doc)
a = np.arange(10)
print(a)

b = np.where(a < 5, a, 10*a)
print(b)


In [None]:
# page 113 Methematical and Statistical Method
# Basic array staticstical method: sum, mean, std, var, min, max, argmin, argmax,cumsum,cumprod

a = np.arange(1,10)
print(a)

print(np.sum(a))
print(np.mean(a))
print(np.std(a))
print(np.var(a))
print(np.min(a))
print(np.max(a))
print(np.argmin(a))
print(np.argmax(a))

b = np.cumsum(a)
print(b)

b = np.cumprod(a)
print(b)


print('\n----------------')
a = np.arange(10).reshape(2,5)
print(a)


b = np.cumsum(a)
print(b)

c = a.cumsum(axis=0)
print(c)

d = a.cumsum(axis=1)
print(d)

In [None]:
# page 115 Methods for Boolean Arrays

a = np.random.randn(10)
print(a)

b = np.rint(a)
print(b)

print(b>0.5)
c = (a>=0).sum()   # count number of positive value
print(c)

d = np.array([False, False,  True, False,  True])
print(d.any())
print(d.all())


In [None]:
# page 115 Methods for sort

a = np.random.randn(5)
print(a)

b = a.sort()
print(b)    # nothing
print(a)

print('\n----------------')

a = np.random.randn(6).reshape(2,3)
a = np.floor( a*10)
print(a)

a.sort(0)    # for col
print(a)

a.sort(1)    # for col
print(a)

In [None]:
# Page 116 Unique and set logic

names = np.array(['Joe', 'Bill', 'Bob','Joe', 'Will','Bob'])
print(names)

print('\n----------------')
some_names= np.unique(names)   # Compute the sorted, unique element
print(some_names)              # already sorted


print('\n----------------')
# vs. pure Python
print(set(names))
print(names)    # notice names no change

name_list = sorted (set(names))   # numpy    name_list.sort()
print(name_list)

# Page 117: Array set operations: unique(x), intersect1d(x,y), union1d(x, y), in1d(x,y)
# vs.
# page 66 Paython set operations a.add(x), a.union(b), a.intersection(b), a.difference(b)

In [None]:
# Page 117
# 4.4 File Input and Output with Arrays
# np.save, np.load

arry = np.arange(10)
print(arry)

np.save('array_file', arry)

b_array = np.load('array_file.npy')
print(b_array)

In [None]:
x = np.array([1, 2,3])
y = np.array([4, 5,6])


z1 = x *y
print(z1)

z2 = x.dot(y)
print(z2)

z3 = np.dot(x,y)
print(z3)

In [None]:
# page 118
# 4.5 Linear Algebra

x = np.array([[1, 2,3],[4,5,6]])
y = np.array([[1,2],[3,4],[5,6]])
print(x)
print(y)

# dot operation
z = x.dot(y)
print(z)
# vs.
z = np.dot(x, y)
print(z)

In [None]:
# Page 120
# 4.6 numpy.random vs. Paython random, more efficient
# Table 4-8 : seed, permutation, shuffle, rand, randint, randn, binomial, normal

sample = np.random.normal(size=(4,4))
print(sample)

In [1]:
# Page 121
# 4.7 Example: Random walks
# Use pure Python

import random
position = 0
walk = [position]
steps =1000
for i in range(steps):
    step = 1 if random.randint(0,1) else -1
    position +=step
    walk.append(position)

plt.plot(walk[:200])

# vs. np.rand
nsteps = 100
draws = np.random.randint(0, 2, size = nsteps)
print(draws)

steps = np.where(draws>0, 1, -1)
print(steps)

NameError: name 'plt' is not defined