### NumPy

In [1]:
%matplotlib inline
from IPython.core.display import HTML
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

path1 = os.path.join(os.getcwd(),'style-table.css')
path2 = os.path.join(os.getcwd(),'style-notebook.css')

css = open(path1).read() + open(path2).read()
HTML('<style>{}</style>'.format(css))

NumPy universal functions:

http://docs.scipy.org/doc/numpy-1.10.0/reference/ufuncs.html#available-ufuncs

More on NumPy:

http://cs231n.github.io/python-numpy-tutorial/

### NumPy - arrays

In [2]:
my_list = [1,2,3,4]
my_array = np.array(my_list)
my_array

array([1, 2, 3, 4])

In [3]:
my_list2 = [11,22,33,44]
my_lists = [my_list,my_list2]

my_array2 = np.array(my_lists)
my_array2

array([[ 1,  2,  3,  4],
       [11, 22, 33, 44]])

In [4]:
my_array2.shape #like in pandas, returns (rows, columns)

(2, 4)

In [5]:
my_array2.dtype

dtype('int32')

In [6]:
np.zeros(5)  #array of straight 0s

array([ 0.,  0.,  0.,  0.,  0.])

In [7]:
my_zeros_array = np.zeros(5)
my_zeros_array.dtype

dtype('float64')

In [8]:
np.ones([5,5]) #5x5 array of 1s

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [9]:
np.empty([5,5]) #5x5 array of 0s

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [10]:
np.eye(5)  #identity matrix (diagonal of 1)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [11]:
np.arange(5) #start, stop, step, commonly used

array([0, 1, 2, 3, 4])

In [12]:
np.arange(5,50,2) #start, stop, step

array([ 5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37,
       39, 41, 43, 45, 47, 49])

### NumPy - arrays with scalars

In [13]:
5/2 #python 3 divides properly, unlike python 2

2.5

In [14]:
arr1 = np.array([[1,2,3,4],[8,9,10,11]])
arr1

array([[ 1,  2,  3,  4],
       [ 8,  9, 10, 11]])

In [15]:
arr1*arr1 #multiply array by itself, each value * itself

array([[  1,   4,   9,  16],
       [ 64,  81, 100, 121]])

In [16]:
arr1-arr1 #array minus itself, will be 0

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [17]:
1/arr1 # divide every value in array by 1

array([[ 1.        ,  0.5       ,  0.33333333,  0.25      ],
       [ 0.125     ,  0.11111111,  0.1       ,  0.09090909]])

In [18]:
arr1 ** 3 #every value cubed

array([[   1,    8,   27,   64],
       [ 512,  729, 1000, 1331]], dtype=int32)

### NumPy - indexing and slicing arrays

In [19]:
arr2 = np.arange(0,11) # 11 values, from 0
arr2

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [20]:
arr2[8]

8

In [21]:
arr2[1:5]

array([1, 2, 3, 4])

In [22]:
arr2[0:5]

array([0, 1, 2, 3, 4])

In [23]:
arr2[0:5] = 100 #set first 5 values to 100
arr2

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [24]:
arr3 = np.arange(0,11) # 11 values, from 0
arr3

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [25]:
slice_of_arr3 = arr3[0:6] #slice of array
slice_of_arr3

array([0, 1, 2, 3, 4, 5])

In [26]:
slice_of_arr3[:] = 99 # all elements set to 99
slice_of_arr3

array([99, 99, 99, 99, 99, 99])

In [27]:
arr3 # numpy slices are only a view of original array, not copied

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [28]:
arr_copy = arr3.copy() # real copies need to be defined explicitly

In [29]:
arr2d = np.array([[5,10,15],[20,25,30],[35,40,45]])
arr2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [30]:
arr2d[1] #show just row 2 of array

array([20, 25, 30])

In [31]:
arr2d[2] #show just row 3 of array

array([35, 40, 45])

In [32]:
arr2d[1][0] # show row 2, column 1

20

In [33]:
arr2d[:2,1:] #slice off the top right 2x2 of arr2d

array([[10, 15],
       [25, 30]])

In [34]:
len(arr2d2) == arr2d2.shape[1] # 2 ways of getting length

NameError: name 'arr2d2' is not defined

### NumPy - filling arrays using for loop

In [None]:
arr2d2 = np.zeros([10,10]) #make new 10x10 array of 0s
arr2d2

In [None]:
# fill arr2d2 with 1s
for i in range(len(arr2d2)):
    arr2d2[i] = 1
    
arr2d2

In [None]:
# fill arr2d2 with 0-9s
for i in range(len(arr2d2)):
    arr2d2[i] = i
    
arr2d2

### NumPy - fancy indexing

In [None]:
arr2d2[[2,4,6,8]]  # get only the 2,4,6,8th rows

In [None]:
arr2d2[[6,8,1]]  # get only the 2,4,6,8th rows

### NumPy - transposing arrays

In [None]:
arr = np.arange(50).reshape([10,5]) #50 points, 10x5 matrix
arr

In [None]:
arr.T #transpose the matrix

In [None]:
np.dot(arr.T,arr) #dot product of original with transposed

### NumPy - 3d matrices and transposing

In [None]:
arr3d = np.arange(50).reshape(5,5,2)
arr3d

In [None]:
arr3d.transpose([1,0,2])

### NumPy - universal array functions

NumPy universal functions:

http://docs.scipy.org/doc/numpy-1.10.0/reference/ufuncs.html#available-ufuncs

More on NumPy:

http://cs231n.github.io/python-numpy-tutorial/

In [None]:
arr = np.arange(11)
arr

In [None]:
np.sqrt(arr)

In [None]:
np.exp(arr)

In [None]:
A = np.random.randn(5) #create a random normal distribution
A

In [None]:
B = np.random.randn(5)
B

In [None]:
np.add(A,B)

In [None]:
A + B

In [None]:
np.maximum(A,B)

In [None]:
np.minimum(A,B)

In [None]:
np.maximum(A,B) ** np.minimum(A,B)

### NumPy - processing arrays

In [None]:
points = np.arange(-5 , 5 , 0.1)

In [None]:
dx,dy = np.meshgrid(points,points)
dx

In [None]:
dy

In [None]:
z = np.sin(dx) + np.sin(dy)
z

In [None]:
plt.imshow(z)

### NumPy - np.where

In [None]:
#np.where is useful for cleaning data when using pandas

In [None]:
a = np.array([1,2,3,4])
b = np.array([100,200,300,400])
condition = np.array([True,True,False,False])

In [None]:
# About zip() function:

# When you zip() together three lists containing 20 elements
# each, the result has 20 elements. Each element is a 
# three-tuple.

In [None]:
#without np.where, we would have to use a list comprehension:
answer = [(i if k else j ) for i,j,k in zip(a,b,condition)]
answer

In [None]:
answer2 = np.where(condition,a,b)
answer2

In [None]:
np.where(False,a,b)

In [None]:
arr5 = np.random.randn(3,3)
arr5

In [None]:
np.where(arr5<0,0,arr5) #if <0 fill with 0s

### NumPy - more array properties

In [None]:
arr6 = np.array([[1,2,3],[4,5,6]])
arr6

In [None]:
arr6.sum()

In [None]:
arr6.sum(0)

In [None]:
arr6.mean()

In [None]:
arr6.std()

In [None]:
arr6.var()  #variance

In [None]:
bool_arr = np.array([True,True,False])

In [None]:
bool_arr.any() #if any values are True, return True

In [None]:
bool_arr.all()  #if all values are True, return True

In [None]:
arr = np.random.randn(5)
arr

In [None]:
arr.sort() # re-sort values, lowest to highest
arr

In [35]:
countries = np.array(['France','Germany','USA','Russia','Mexico','USA','Germany'])
countries

array(['France', 'Germany', 'USA', 'Russia', 'Mexico', 'USA', 'Germany'], 
      dtype='<U7')

In [37]:
np.unique(countries) # get unique values

array(['France', 'Germany', 'Mexico', 'Russia', 'USA'], 
      dtype='<U7')

In [40]:
# check for values in countries, return boolean results  
np.in1d(['France','USA','Sweden'],countries)

array([ True,  True, False], dtype=bool)

### NumPy - array input and output (as npy, npz, txt)

In [41]:
arr8 = np.arange(5)
arr8

array([0, 1, 2, 3, 4])

In [42]:
np.save('myarray',arr8)  # save arr8 as myarray

In [45]:
arr8 = np.arange(10) #make new array with same name
arr8  

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [47]:
np.load('myarray.npy') # load myarray

array([0, 1, 2, 3, 4])

In [50]:
arr1 = np.arange(11)
arr2 = np.arange(12)
np.savez('ziparray.npz',x=arr1,y=arr2) #save 2 arrays to zip

In [53]:
archive_array = np.load('ziparray.npz') # load zip
archive_array['x']

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [54]:
archive_array['y']

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [55]:
mat = np.array([[1,2,3],[4,5,6]])
mat

array([[1, 2, 3],
       [4, 5, 6]])

In [56]:
np.savetxt('mytextarray.txt',mat,delimiter=',') # save to txt file

In [58]:
arr = np.loadtxt('mytextarray.txt',delimiter=',') #load from text
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])