# Chapter 4 - NumPy Basics: Arrays and Vectorized Computation

## 4.3 Array-Oriented Programming with Arrays

In [1]:
import numpy as np

### Expressing Conditional Logic as Array Operations

Suppose there are 3 arrays. Take a value from `i` if the entry for `is_valid` is `True`, take from `j` if the entry for `is_valid` is `False`.

In [2]:
i = np.array([309, 230, 143, 447, 231, 123, 198, 496])
j = np.array([25, 25, 35, 20, 30, 35, 45, 20])
is_valid = np.array([False, False, False,  True,  True, False,  True, False])

In [3]:
# Method using list comprehension. This is not so fast.
result = [entry_i if entry_valid else entry_j for entry_i, entry_j, entry_valid in zip(i, j, is_valid)]
result

[25, 25, 35, 447, 231, 35, 198, 20]

Use `np.where` to create a condition and select the element based on the element.

In [4]:
result2 = np.where(is_valid, i, j)
result2

array([ 25,  25,  35, 447, 231,  35, 198,  20])

Use `np.where()` on a condition with a 2D array.

In [5]:
# k = np.random.randint(-5, 3, (3,3))
k = np.array([[ 3,  2, -1],
              [ 2,  0,  1],
              [-1,  0, -3]])
# Swap all values that are negative to 0
k_is_negative = k <= 0
display(k_is_negative)
k_positive = np.where(k_is_negative, 0, k)
display(k_positive)

array([[False, False,  True],
       [False,  True, False],
       [ True,  True,  True]])

array([[3, 2, 0],
       [2, 0, 1],
       [0, 0, 0]])

<hr>
### Mathematical and Statistical Methods

**`np.sum()`** function for arrays

In [6]:
# s = np.random.randint(1, 6, (3,4))
s = np.array([[3, 2, 2, 2],
              [1, 5, 5, 1],
              [2, 2, 3, 5]])
# np.sum(arr) gets the sum of all values.
#
# np.sum(arr, 0) and np.sum(arr, 1) gets the sum of all columns, and rows respectively
display(np.sum(s, 1))
display(np.sum(s, 0))

array([ 9, 12, 12])

array([ 6,  9, 10,  8])

**`np.mean()`** function for arrays

In [7]:
t = np.random.randint(5, 10, (2,5))
t = np.array([[9, 9, 6, 5, 7],
              [8, 7, 7, 7, 7]])
# Similarly, np.mean(arr) gets the sum of all values.
print(np.mean(t))
print()
# np.mean(arr, 0) and np.mean(arr, 1) gets the mean of all columns, and rows respectively
display(np.mean(t, 0))
# Going one step further, this is equivalent
print(np.sum(t, 0)/np.size(t, 0))
print()

# Same concept applies for calculation for each row
display(np.mean(t, 1))
print(np.sum(t, 1)/np.size(t, 1))

7.2



array([8.5, 8. , 6.5, 6. , 7. ])

[8.5 8.  6.5 6.  7. ]



array([7.2, 7.2])

[7.2 7.2]


**`np.cumsum()`** function for arrays

In [8]:
t = np.array([[9, 9, 6, 5, 7],
              [8, 7, 7, 7, 7]])
print(np.cumsum(t))
print()
# Returns the cumsum of all terms from left to right, then downwards
# In the form of an array the size of (1 x (m*n)) for m rows and n columns

# Keeps the dimensions of the array, calculates cumsum traversing each column (operates column-wise)
display(np.cumsum(t, axis=0)) 
print()
# Keeps the dimensions of the array, calculates cumsum traversing each row (operates row-wise)
display(np.cumsum(t, axis=1)) 

[ 9 18 24 29 36 44 51 58 65 72]



array([[ 9,  9,  6,  5,  7],
       [17, 16, 13, 12, 14]])




array([[ 9, 18, 24, 29, 36],
       [ 8, 15, 22, 29, 36]])

<hr>
### Methods for Boolean Arrays
For boolean arrays, `True` is treated as `1` and `False` is treated as `0`.

In [9]:
# nums = np.random.randint(5, 13, (3,5))
nums = np.array([[ 7,  6,  6,  7, 10],
                 [10, 11, 11,  9, 11],
                 [10, 10, 10,  9,  9]])
is_odd = nums % 2 != 0
print(is_odd)

sum_odd_numbers = (nums % 2 != 0).sum()
# Gets the number of odd values in the array
print(sum_odd_numbers)

# It is the same as the sum of the following array:
is_odd = np.where(nums % 2 != 0, 1, 0)
display(is_odd)


[[ True False False  True False]
 [False  True  True  True  True]
 [False False False  True  True]]
8


array([[1, 0, 0, 1, 0],
       [0, 1, 1, 1, 1],
       [0, 0, 0, 1, 1]])

<hr>
### Sorting

Sort an array inplace using `np.sort(arr)`

In [10]:
# salaries = np.random.randint(1000, 5000, (1, 4))
salaries = np.array([[4275, 4761, 1829, 1217]])
display(np.sort(salaries))
# Here is a neat way to sort in descending order
display(-np.sort(-salaries))

array([[1217, 1829, 4275, 4761]])

array([[4761, 4275, 1829, 1217]])

In [11]:
# sales = np.random.randint(10, 50, (4, 5))
sales = np.array([[31, 43, 22, 17, 40],
                  [47, 19, 16, 41, 11],
                  [18, 39, 30, 24, 46],
                  [47, 41, 35, 17, 12]])

# axis=0 will sort values traversing each column (operates column-wise)
display(np.sort(sales, axis=0))
print()
# axis=1 will sort values traversing each row (operates row-wise)
display(np.sort(sales, axis=1))

array([[18, 19, 16, 17, 11],
       [31, 39, 22, 17, 12],
       [47, 41, 30, 24, 40],
       [47, 43, 35, 41, 46]])




array([[17, 22, 31, 40, 43],
       [11, 16, 19, 41, 47],
       [18, 24, 30, 39, 46],
       [12, 17, 35, 41, 47]])

<hr>
### Unique and Test of Membership
**Use `np.unique()` to find unique values in an array.**

In [12]:
# altitudes = np.random.randint(1, 4, (1,6))*1000
altitudes = np.array([[2000, 3000, 1000, 2000, 3000, 1000]])
altitudes = altitudes.astype(str)
display(altitudes)
display(np.unique(altitudes))

array([['2000', '3000', '1000', '2000', '3000', '1000']], dtype='<U21')

array(['1000', '2000', '3000'], dtype='<U21')

**Use `np.in1d()` to find if a value exists in another array.**

It will traverse each element in the array, and returns `True` if the value exists in the given list, `False` otherwise.

In [13]:
# vols = np.random.randint(1, 6, (1,6))*15
vols = np.array([[60, 45, 75, 45, 60, 45]])
display(np.in1d(vols, [15, 30, 45, 60]))

# This is equivalent
display(np.in1d(vols, np.arange(1,5)*15))

array([ True,  True, False,  True,  True,  True])

array([ True,  True, False,  True,  True,  True])

<hr>

**References:**

Python for Data Analysis, 2nd Edition, McKinney (2017)