# Boolean Indexing

## Array Indexing for changing elements:

In [2]:
import numpy as np
an_array = np.array([[11,12], [21,22], [31,32]])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [3]:
filter = (an_array > 15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]], dtype=bool)

Notice that the filter is a same size ndarray as an_array is filled with true for each element corresponding element in an_array which is greater than 15 and False for those elements whose value is less than 15.


In [4]:
# we can now select just those elements which meet that criteria
print(an_array[filter])

[21 22 31 32]


In [7]:
an_array[an_array > 15]

array([21, 22, 31, 32])

In [8]:
an_array[(an_array > 20) & (an_array < 30)]

array([21, 22])

In [9]:
an_array[(an_array % 2 == 0)]

array([12, 22, 32])

What is particularly useful is that we can actually change elements in the array applying a similar logical filter. Let's add 100 to all the even values.

In [10]:
an_array[an_array % 2 == 0] +=100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


# Datatypes and Array Operations

## Datatypes:

In [11]:
ex1 = np.array([11, 12])
print(ex1.dtype)

int32


In [12]:
ex2 = np.array([11.0, 12.0])
print(ex2.dtype)

float64


In [13]:
ex3 = np.array([11, 21], dtype=np.int64) # you can also tell python the data type
print(ex3.dtype)

int64


In [14]:
# you can use this to force floats into intergers (using foor function)
ex4 = np.array([11.1,12.7], dtype=np.int64)
print(ex4.dtype)
print()
print(ex4)

int64

[11 12]


In [15]:
# you can use this to force integers into floats if you anticipate the values may change to floats later
ex5 = np.array([11, 21], dtype=np.float64)
print(ex5.dtype)
print()
print(ex5)

float64

[ 11.  21.]


## Arithmetic Array Operations:

In [16]:
x = np.array([[111,112],[121,122]], dtype=np.int)
y = np.array([[211.1,212.1],[221.1,222.1]], dtype=np.float64)

print(x)
print()
print(y)

[[111 112]
 [121 122]]

[[ 211.1  212.1]
 [ 221.1  222.1]]


In [17]:
print(x + y)
print()
print(np.add(x, y))

[[ 322.1  324.1]
 [ 342.1  344.1]]

[[ 322.1  324.1]
 [ 342.1  344.1]]


In [18]:
print(x - y)
print()
print(np.subtract(x, y))

[[-100.1 -100.1]
 [-100.1 -100.1]]

[[-100.1 -100.1]
 [-100.1 -100.1]]


In [19]:
print(x * y)
print()
print(np.multiply(x, y))

[[ 23432.1  23755.2]
 [ 26753.1  27096.2]]

[[ 23432.1  23755.2]
 [ 26753.1  27096.2]]


In [20]:
print(x / y)
print()
print(np.divide(x, y))

[[ 0.52581715  0.52805281]
 [ 0.54726368  0.54930212]]

[[ 0.52581715  0.52805281]
 [ 0.54726368  0.54930212]]


In [21]:
print(np.sqrt(x))

[[ 10.53565375  10.58300524]
 [ 11.          11.04536102]]


In [22]:
# exponent (e^x)
print(np.exp(x)) 

[[  1.60948707e+48   4.37503945e+48]
 [  3.54513118e+52   9.63666567e+52]]


# Statistical Methods, Sorting, and Set Operations

## Basic Statistical Operations

In [24]:
arr = 10 * np.random.randn(2,5)
print(arr)

[[ -7.3661169    7.82519303  -4.76431506 -27.88027302  -7.88223129]
 [ -5.36836847  -4.11274388  12.38577656  -9.15517253   8.92184029]]


In [25]:
print(arr.mean())

-3.73964112767


In [28]:
# compute the means by row
print(arr.mean(axis = 1))

[-8.01354865  0.53426639]


In [29]:
# compute the means by column
print(arr.mean(axis = 0))

[ -6.36724268   1.85622458   3.81073075 -18.51772278   0.5198045 ]


In [30]:
# sum all the elements 
print(arr.sum())

-37.3964112767


In [31]:
# compute the medians
print(np.median(arr, axis =1))

[-7.3661169  -4.11274388]


## Sorting

In [32]:
unsorted = np.random.randn(10)

print(unsorted)

[ 0.45494301 -0.10357306 -0.79752311  0.12306193 -0.5473404  -0.37336707
 -0.06261632  0.83495504  0.12424889  0.53325217]


In [33]:
# create copy and sort
sorted = np.array(unsorted)
sorted.sort()

print(sorted)
print()
print(unsorted)

[-0.79752311 -0.5473404  -0.37336707 -0.10357306 -0.06261632  0.12306193
  0.12424889  0.45494301  0.53325217  0.83495504]

[ 0.45494301 -0.10357306 -0.79752311  0.12306193 -0.5473404  -0.37336707
 -0.06261632  0.83495504  0.12424889  0.53325217]


In [34]:
# implace sorting
unsorted.sort()

print(unsorted)

[-0.79752311 -0.5473404  -0.37336707 -0.10357306 -0.06261632  0.12306193
  0.12424889  0.45494301  0.53325217  0.83495504]


## Finding Unique elements

In [35]:
array = np.array([1,2,1,4,2,1,4,2])

print(np.unique(array))

[1 2 4]


## Set operations with np.array data type:

In [36]:
s1 = np.array(['desk','chair','bulb'])
s2 = np.array(['lamp', 'bulb', 'chair'])
print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [37]:
print( np.intersect1d(s1, s2))

['bulb' 'chair']


In [38]:
print( np.union1d(s1, s2))

['bulb' 'chair' 'desk' 'lamp']


In [39]:
print( np.setdiff1d(s1, s2)) # elements in s1 that are not in s2


['desk']


In [40]:
print( np.in1d(s1, s2)) #which element of s1 is also in s2

[False  True  True]


## Broadcasting:

introduction to broadcasting.
For more details, see:
http://docs.scipy.org/doc/numpy-1.10.1/user/basics.broadcasting.html

In [41]:
import numpy as np

start = np.zeros((4,3))
print(start)

[[ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]


In [42]:
add_rows = np.array([1, 0, 2])
print(add_rows)

[1 0 2]


In [44]:
y = start + add_rows
print(y)

[[ 1.  0.  2.]
 [ 1.  0.  2.]
 [ 1.  0.  2.]
 [ 1.  0.  2.]]


In [47]:
add_cols = np.array([[0,1,2,3]])
add_cols = add_cols.T

print(add_cols)

[[0]
 [1]
 [2]
 [3]]


In [48]:
y = start + add_cols
print(y)

[[ 0.  0.  0.]
 [ 1.  1.  1.]
 [ 2.  2.  2.]
 [ 3.  3.  3.]]


In [49]:
add_scalar = np.array([1])
print(start+add_scalar)

[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]


# Speedtest: ndarrys vs Lists

First setup paramaters for the speed test. We'll be testing time to sum elements in an ndarray versus a list

In [50]:
from numpy import arange
from timeit import Timer

size = 1000000
timeits = 1000

In [52]:
nd_array = arange(size)
print( type(nd_array) )

<class 'numpy.ndarray'>


In [56]:
timer_numpy = Timer("nd_array.sum()", "from __main__ import nd_array")
print("Time taken by numpy ndarray: %f seconds" %
     (timer_numpy.timeit(timeits)/timeits))

Time taken by numpy ndarray: 0.000591 seconds


In [57]:
a_list = list(range(size))
print (type(a_list))

<class 'list'>


In [None]:
timer_list = Timer("sum(a_list)", "from __main__ import a_list")

print(" Time taken by list: %f seconds" %
     (timer_list.timeit(timeits)/timeits))