##### <b> Array Operations </b></br>

##### <b> Arithmetic operators can be used to perform array operations </b></br> - Array operations are applied via vectorization and broadcasting which eliminates need to loop through arrays elements

In [2]:
import numpy as np
import pandas as pd

In [100]:
# generate nested list
sales = [[0, 5, 155, 0, 518], [0, 1827, 616, 317, 325]]

# convert in np array
sales_array = np.array(sales)
sales_array

array([[   0,    5,  155,    0,  518],
       [   0, 1827,  616,  317,  325]])

In [4]:
# with + 2 -> 2 is added to each element of the np array
sales_array + 2

array([[   2,    7,  157,    2,  520],
       [   2, 1829,  618,  319,  327]])

In [5]:
# Assign all elements of first row to quantity
quantity = sales_array[0, :]
# Assign all elements of second row to price
price = sales_array[1, :]

# corresponding elements are multiplied together
quantity * price

array([     0,   9135,  95480,      0, 168350])

In [6]:
# generate random number generator with 616 seed
rng = np.random.default_rng(616)

# assign random integers to inventory variable start 0, end 100, 10 random grabbed
inventory = rng.integers(0, 100, 10)
inventory

array([39, 39, 93, 86, 48, 46, 48, 30, 11, 57], dtype=int64)

In [7]:
# minus 24 from each array element
inventory - 24

array([ 15,  15,  69,  62,  24,  22,  24,   6, -13,  33], dtype=int64)

In [8]:
# view half the inventory amounts -> divide 2 from each array element
inventory / 2

array([19.5, 19.5, 46.5, 43. , 24. , 23. , 24. , 15. ,  5.5, 28.5])

In [9]:
# create 11 random price values (which will be between 0 and 1) so multiple by 10 and then round to 2 decimal places
# price = (rng.random(11)*10).round(2)
# this operation will have error because array shapes do not match 10 vs 11
# price * inventory

In [10]:
# create 10 random price values (which will be between 0 and 1) so multiple by 10 and then round to 2 decimal places
price = (rng.random(10)*10).round(2)

price

array([0.89, 8.82, 7.32, 7.32, 5.62, 3.4 , 0.63, 3.57, 2.03, 4.31])

In [11]:
# operations can occurs between arrays as long as they are the same shape
# price * inventory

# the is a sum method to easily add all element values
(price * inventory).sum().round(2)

2520.47

In [12]:
# np arrays can be cast to a list
inventory_list = list(inventory)
inventory_list

[39, 39, 93, 86, 48, 46, 48, 30, 11, 57]

##### <b> Filtering Arrays </b></br> You can filter arrays by indexing them with a logical test </br> - Only the array elements in positions where the logical test returns True are returned

In [13]:
sales_array

array([[   0,    5,  155,    0,  518],
       [   0, 1827,  616,  317,  325]])

In [14]:
# perform logical test in NumPy array will return a Boolean array with the results of logical test on each array element

# logical test to see which array elements do not equal zero
sales_array != 0

array([[False,  True,  True, False,  True],
       [False,  True,  True,  True,  True]])

In [19]:
# assign the True values to new array (using boolean filter on array to pull True values depending on logical condition) assigns elements that have a value that isn't zero
actual_sales_array = sales_array[sales_array != 0]
actual_sales_array

array([   5,  155,  518, 1827,  616,  317,  325])

##### Can be filtered with multiple logical tests using & (and) | (or)

In [21]:
multiple_logic1 = sales_array[(sales_array == 616) | (sales_array < 100)]
multiple_logic1

array([  0,   5,   0,   0, 616])

In [22]:
multiple_logic2 = sales_array[(sales_array > 100) & (sales_array < 500)]
multiple_logic2

array([155, 317, 325])

##### Multiple logical tests can be added to variable and used as Boolean mask

In [25]:
# can create variable that will handle the boolean mask for filtering multiple logic conditions
mask1 = ((sales_array < 1500) & (sales_array > 500)) | (sales_array == 0)

# apply boolean mask variable to array
multiple_logic_mask = sales_array[mask1]
multiple_logic_mask

array([  0,   0, 518,   0, 616])

##### Values from One array can be used to filter another array (if arrays are same shape)

In [33]:
# call multiple_logic_mask
multiple_logic_mask

array([  0,   0, 518,   0, 616])

In [34]:
# creation of product array
product_array = np.array(['fruits', 'vegetables', 'cereal', 'dairy', 'eggs'])
product_array

array(['fruits', 'vegetables', 'cereal', 'dairy', 'eggs'], dtype='<U10')

In [35]:
# array to be filtered[array used as filter with logic]
# this will filter out values that are zero from the array
filtered_product = product_array[multiple_logic_mask > 0]
filtered_product

array(['cereal', 'eggs'], dtype='<U10')

In [39]:
# another boolean mask example
demo_array = np.arange(0,20)
demo_array

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [41]:
# string array 'even', 'odd' 10 ten times to match demo_array shape
even_odd = np.array(['even', 'odd']*10)
even_odd

array(['even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even',
       'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd',
       'even', 'odd'], dtype='<U4')

In [45]:
even_odd[even_odd != 'odd']

array(['even', 'even', 'even', 'even', 'even', 'even', 'even', 'even',
       'even', 'even'], dtype='<U4')

In [48]:
# sets elements in demo_array to 0 where the condition even_odd != 'odd' evaluates to true.
demo_array[even_odd != 'odd'] = 0
demo_array

array([ 0,  1,  0,  3,  0,  5,  0,  7,  0,  9,  0, 11,  0, 13,  0, 15,  0,
       17,  0, 19])

##### <b> Where Function </b></br> - where() NumPy function performs logical test and returns a given value if test is True and returns a value if test is False </br> &nbsp;&nbsp; np.where(logical test, Value if True, Value if False)

In [52]:
# creation of inventory array
inventory_array = np.array([12, 102, 18, 0, 0])
inventory_array

array([ 12, 102,  18,   0,   0])

In [53]:
# calling product array
product_array

array(['fruits', 'vegetables', 'cereal', 'dairy', 'eggs'], dtype='<U10')

In [54]:
# using NumPy where() function to filter array
# np.where(logical test, Value if True, Value if False)

np.where(inventory_array > 0, 'In Stock', 'Out of Stock')

array(['In Stock', 'In Stock', 'In Stock', 'Out of Stock', 'Out of Stock'],
      dtype='<U12')

In [55]:
# using NumPy where() function to filter array to return value and value from filtered Array
# np.where(logical test, Value if True, Value if False)

np.where(inventory_array > 0, 'In Stock', product_array)

array(['In Stock', 'In Stock', 'In Stock', 'dairy', 'eggs'], dtype='<U10')

In [57]:
# chaining .np.where() conditions
demo_array1 = np.arange(0,20)
demo_array1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [59]:
# chaining .np.where() conditions
# label elements in demo_array as 'even' if divisible by 2, 'odd' otherwise, and replaces elements with the value 9 if they're equal to 9
np.where(demo_array % 2 == 0, 'even', np.where(demo_array == 9, 9, 'odd'))

array(['even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even',
       '9', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd',
       'even', 'odd'], dtype='<U11')

##### Modify Array values - not used often as data analyst

In [38]:
# copy array values to new array
modify_array = multiple_logic_mask.copy()
# array([  0,   0, 518,   0, 616])

# modify values is done by calling the index and assigning value
modify_array[3] = 3
modify_array
# third index is changed to 3
#array([  0,   0, 518,   3, 616])


array([  0,   0, 518,   3, 616])

##### <b>Array Aggregation Methods </b></br> Let you Calculate Metrics </br> &nbsp;&nbsp; sum, mean, max, min, stddev

In [61]:
# call sales_array
sales_array

array([[   0,    5,  155,    0,  518],
       [   0, 1827,  616,  317,  325]])

In [87]:
# these return the aggregated values from all values in the array
print(f'Array Elements Sum: {sales_array.sum()}')
print(f'Array Elements Product: {sales_array.prod()}')
print(f'Array Elements Minimum: {sales_array.min()}')
print(f'Array Elements Maximum: {sales_array.max()}')
print(f'Array Elements Mean: {sales_array.mean()}')
print(f'Array Elements Median: {np.median(sales_array)}')
print(f'Array Elements Standard Deviation: {sales_array.std()}')
print(f'Array Elements Variance: {sales_array.var()}')
# Percentile function requires the Percentile amount 25th, 75th percentile as argument
print(f'Array Elements 25th Percentile: {np.percentile(sales_array, 25)}')
print(f'Array Elements 75th Percentile: {np.percentile(sales_array, 75)}')
# square root function does each element of array
print('Array Elements Square Root:') 
np.sqrt(sales_array)

Array Elements Sum: 3763
Array Elements Product: 0
Array Elements Minimum: 0
Array Elements Maximum: 1827
Array Elements Mean: 376.3
Array Elements Median: 236.0
Array Elements Standard Deviation: 529.1366647662965
Array Elements Variance: 279985.61
Array Elements 25th Percentile: 1.25
Array Elements 75th Percentile: 469.75
Array Elements Square Root:


array([[ 0.        ,  2.23606798, 12.4498996 ,  0.        , 22.75961335],
       [ 0.        , 42.74342055, 24.81934729, 17.80449381, 18.02775638]])

##### <b> Array Aggregation Methods across Rows or Columns </b></br>&nbsp;&nbsp;axis=0: Aggregate across rows .sum(axis=0) </br> &nbsp;&nbsp;axis=1: Aggregate across columns .sum(axis=1)

In [96]:
# display sum for rows axis=1
print('Array Row Elements Sum:')
sales_array.sum(axis=1)

Array Row Elements Sum:


array([ 678, 3085])

In [97]:
# display sum for columns axis=0
print('Array Column Elements Sum:')
sales_array.sum(axis=0)

Array Column Elements Sum:


array([   0,  317,  330,  771, 2345])

##### <b> Sorting Array </b></br> - sort() will sort arrays in place </br> &nbsp;&nbsp; use the axis argument to specify the dimension to sort by

In [110]:
# copy original sales_array for row and column sorting
sales_array1 = sales_array.copy()
sales_array2 = sales_array.copy()

In [113]:
# calling sort method will sort multidimensional array row by row as default which is axis=1
sales_array1.sort()
sales_array1

array([[   0,    0,    5,  155,  518],
       [   0,  317,  325,  616, 1827]])

In [112]:
# to sort by columns use axis=0
sales_array2.sort(axis=0)
sales_array2

array([[   0,    5,  155,    0,  325],
       [   0, 1827,  616,  317,  518]])

In [114]:
# using np.sort(array_name) will sort but not change order of original array unlike .sort() method
np.sort(sales_array)

array([[   0,    0,    5,  155,  518],
       [   0,  317,  325,  616, 1827]])

In [116]:
#original sort order of sales_array is maintained
sales_array

array([[   0,    5,  155,    0,  518],
       [   0, 1827,  616,  317,  325]])