# NumPy array operations

In [2]:
import numpy as np

In [3]:
array = np.array([
    [1,2,3,4,5,6],
    [2,3,2,3,2,3]
])

x = array[0]
y = array[1]

In [11]:
print("x: ",x, end=" --- ")
print("y: ",y)

x:  [1 2 3 4 5 6] --- y:  [2 3 2 3 2 3]


In [13]:
# array by array multiply -> A1*B1, A2*B2 etc...
x*y

array([ 2,  6,  6, 12, 10, 18])

In [15]:
# arrays need to be same length
x[:,:-1]*y

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

Broadcasting

In [22]:
# get only half of y array
temp_y = y[:len(y)//2]
temp_y

array([2, 3, 2])

In [24]:
# reshape x, so each row is same length as temp_y
temp_x = x.reshape(2,3)
temp_x

array([[1, 2, 3],
       [4, 5, 6]])

In [26]:
# numpy will be able to apply the vectorization despite shapes not being equal, because it will treat each element of temp_x and unique array
temp_x+temp_y

array([[3, 5, 5],
       [6, 8, 8]])

In [28]:
temp_x*temp_y

array([[ 2,  6,  6],
       [ 8, 15, 12]])

In [29]:
x*100

array([100, 200, 300, 400, 500, 600])

In [30]:
x-200

array([-199, -198, -197, -196, -195, -194])

In [32]:
r = np.random.rand(10)
r

array([0.40993631, 0.5787516 , 0.89105885, 0.54700041, 0.64470693,
       0.4572792 , 0.19745054, 0.46955405, 0.47664605, 0.96005667])

In [35]:
(r*100).round(2)

array([40.99, 57.88, 89.11, 54.7 , 64.47, 45.73, 19.75, 46.96, 47.66,
       96.01])

Course assignment: array operations

In [36]:
prices = (np.random.rand(6)*100).round(2)
prices

array([82.22, 39.5 , 92.92, 28.81, 50.42,  2.91])

In [37]:
prices += 5

In [41]:
discount_percent = np.random.rand(6)

In [43]:
percent_owed = 1-discount_percent

In [51]:
final_owed = (prices*percent_owed).round(2)
final_owed.astype("float64")

array([5.933e+01, 3.224e+01, 4.580e+01, 1.679e+01, 2.290e+00, 1.000e-02])

Filtering arrays

In [100]:
array = np.random.randint(1,101,25)
array

array([94, 36,  5, 58,  8,  1, 88, 56, 91,  7, 36, 75,  6, 20, 87, 19, 18,
       75, 25, 12, 95, 15, 17, 74, 15])

In [101]:
array > 50

array([ True, False, False,  True, False, False,  True,  True,  True,
       False, False,  True, False, False,  True, False, False,  True,
       False, False,  True, False, False,  True, False])

In [102]:
array[array > 50]

array([94, 58, 88, 56, 91, 75, 87, 75, 95, 74])

In [103]:
mask = (array > 50) & (array < 90)
array = array.astype("U12")

In [106]:
array[mask] = "in between"

In [111]:
array

array(['94', '36', '5', 'in between', '8', '1', 'in between',
       'in between', '91', '7', '36', 'in between', '6', '20',
       'in between', '19', '18', 'in between', '25', '12', '95', '15',
       '17', 'in between', '15'], dtype='<U12')

Where

In [112]:
array = np.array([0,11,0,20,2,5,6,0])



In [114]:
np.where(array > 0, "In stock", "Out of sotck")

array(['Out of sotck', 'In stock', 'Out of sotck', 'In stock', 'In stock',
       'In stock', 'In stock', 'Out of sotck'], dtype='<U12')

In [118]:
mapping_array = array*100

In [120]:
np.where(array,mapping_array,"Out of stock")

array(['Out of stock', '1100', 'Out of stock', '2000', '200', '500',
       '600', 'Out of stock'], dtype='<U12')

In [122]:
#array([ 0, 11,  0, 20,  2,  5,  6,  0])
np.where(array % 2 == 0,"even",np.where(array == 11,"accepted",array))

array(['even', 'accepted', 'even', 'even', 'even', '5', 'even', 'even'],
      dtype='<U11')

Course assignment: filtering arrays

In [6]:
prices = np.array([5.99, 6.99, 22.49, 99.99, 4.99, 49.99])
products = np.array(
    ["salad", "bread", "mustard", "rare tomato", "cola", "gourmet ice cream"]
)

In [9]:
products[prices>25]

array(['rare tomato', 'gourmet ice cream'], dtype='<U17')

In [12]:
products[(prices > 25) | (products == "cola")]

array(['rare tomato', 'cola', 'gourmet ice cream'], dtype='<U17')

In [14]:
shipping_cost = np.where(prices > 20,0,5)
shipping_cost

array([5, 5, 0, 0, 5, 0])

Array aggregation

In [16]:
matrix = np.random.randint(10,100,4*4*4).reshape((4,4,4))
matrix

array([[[15, 91, 23, 70],
        [20, 85, 50, 38],
        [23, 87, 10, 79],
        [27, 56, 23, 30]],

       [[71, 38, 62, 45],
        [49, 79, 36, 74],
        [52, 32, 57, 73],
        [97, 19, 40, 65]],

       [[63, 92, 47, 77],
        [21, 88, 59, 98],
        [96, 18, 68, 69],
        [27, 34, 49, 83]],

       [[64, 58, 61, 42],
        [24, 24, 35, 14],
        [91, 92, 45, 75],
        [44, 74, 31, 65]]])

In [17]:
matrix.sum()

3444

In [18]:
matrix.mean()

53.8125

In [21]:
matrix.sum()/matrix.size

53.8125

In [22]:
matrix.max()

98

In [24]:
matrix.min()

10

In [28]:
temp_matrix = matrix.reshape((2,32))
temp_matrix

array([[15, 91, 23, 70, 20, 85, 50, 38, 23, 87, 10, 79, 27, 56, 23, 30,
        71, 38, 62, 45, 49, 79, 36, 74, 52, 32, 57, 73, 97, 19, 40, 65],
       [63, 92, 47, 77, 21, 88, 59, 98, 96, 18, 68, 69, 27, 34, 49, 83,
        64, 58, 61, 42, 24, 24, 35, 14, 91, 92, 45, 75, 44, 74, 31, 65]])

In [32]:
temp_matrix.sum(axis=0)

array([ 78, 183,  70, 147,  41, 173, 109, 136, 119, 105,  78, 148,  54,
        90,  72, 113, 135,  96, 123,  87,  73, 103,  71,  88, 143, 124,
       102, 148, 141,  93,  71, 130])

In [36]:
temp_matrix.sum(axis=1)

array([1616, 1828])

Other aggregations - using "np" call, rather can calling object method

In [49]:
np.median(matrix)

54.0

In [53]:
print(np.percentile(matrix,100),matrix.max())

98.0 98


In [55]:
np.percentile(matrix,90)

90.10000000000001

In [56]:
np.unique(matrix)

array([10, 14, 15, 18, 19, 20, 21, 23, 24, 27, 30, 31, 32, 34, 35, 36, 38,
       40, 42, 44, 45, 47, 49, 50, 52, 56, 57, 58, 59, 61, 62, 63, 64, 65,
       68, 69, 70, 71, 73, 74, 75, 77, 79, 83, 85, 87, 88, 91, 92, 96, 97,
       98])

In [57]:
np.sqrt(matrix)

array([[[3.87298335, 9.53939201, 4.79583152, 8.36660027],
        [4.47213595, 9.21954446, 7.07106781, 6.164414  ],
        [4.79583152, 9.32737905, 3.16227766, 8.88819442],
        [5.19615242, 7.48331477, 4.79583152, 5.47722558]],

       [[8.42614977, 6.164414  , 7.87400787, 6.70820393],
        [7.        , 8.88819442, 6.        , 8.60232527],
        [7.21110255, 5.65685425, 7.54983444, 8.54400375],
        [9.8488578 , 4.35889894, 6.32455532, 8.06225775]],

       [[7.93725393, 9.59166305, 6.8556546 , 8.77496439],
        [4.58257569, 9.38083152, 7.68114575, 9.89949494],
        [9.79795897, 4.24264069, 8.24621125, 8.30662386],
        [5.19615242, 5.83095189, 7.        , 9.11043358]],

       [[8.        , 7.61577311, 7.81024968, 6.4807407 ],
        [4.89897949, 4.89897949, 5.91607978, 3.74165739],
        [9.53939201, 9.59166305, 6.70820393, 8.66025404],
        [6.63324958, 8.60232527, 5.56776436, 8.06225775]]])

Sorting arrays

In [61]:
matrix = np.random.randint(1,100,16).reshape((2,8))
matrix

array([[81, 71, 23, 25, 31, 99,  9, 64],
       [27, 63, 75,  5, 67, 18, 14, 37]])

In [63]:
matrix.sort(axis=0)

In [65]:
matrix.sort()

In [66]:
matrix

array([[ 5,  9, 18, 23, 27, 31, 37, 63],
       [14, 25, 64, 67, 71, 75, 81, 99]])

Course assignment: sorting and aggregating

In [68]:
prices.sort()

In [78]:
print(prices[-3:].mean())
print(prices[-3:].max())
print(prices[-3:].min())
print(np.median(prices[-3:]))


57.49
99.99
22.49
49.99


In [82]:
np.unique(products).size

6

Final assignment

In [87]:
import pandas as pd
retail_df = pd.read_csv(
    "../pandas-course/Pandas Course Resources/retail/retail_2016_2017.csv", skiprows=range(1, 11000), nrows=1000
)

family_array = np.array(retail_df["family"])
sales_array = np.array(retail_df["sales"])

In [92]:
produce_sales = sales_array[family_array == "PRODUCE"]
produce_sales

array([1662.394,  447.064, 2423.944,  962.866, 1236.404,  298.441,
       1077.44 , 3404.531,  962.96 ,  279.505, 1852.786, 1089.319,
        726.516, 7860.031,  446.038, 1155.385,  120.202,  862.092,
        473.952,  254.263, 1272.755, 2775.771, 2030.762, 1657.432,
       2339.906,  722.333, 1567.843, 2458.456,  673.885, 8834.15 ])

In [94]:
produce_sales_random_half = np.random.choice(produce_sales,size=produce_sales.size//2)

In [97]:
print(produce_sales_random_half.mean())
print(np.median(produce_sales_random_half))


1446.7500666666667
726.516


In [101]:
np.where((produce_sales_random_half > produce_sales_random_half.mean()) & (produce_sales_random_half > np.median(produce_sales_random_half)), 
         "above both",np.where(produce_sales_random_half >  np.median(produce_sales_random_half), "above median","below both"))

array(['below both', 'above median', 'below both', 'below both',
       'above median', 'above both', 'above both', 'above both',
       'below both', 'below both', 'below both', 'above both',
       'below both', 'above median', 'below both'], dtype='<U12')

In [111]:
type(np.array([1,2]))

numpy.ndarray

In [112]:
type(np.array([[1,2],[1,2]]))

numpy.ndarray

In [114]:
type(np.array)

builtin_function_or_method

In [117]:
np.ndarray([1,2])

array([[4.24399158e-314, 4.24399158e-314]])