# NumPy
- Good tutorial on numpy: https://www.machinelearningplus.com/python/numpy-tutorial-part1-array-python-examples/ and https://www.machinelearningplus.com/python/numpy-tutorial-python-part2/
- Numpy is the core library for **scientific computing in Python and used for numerical computations**.
- Performing numerical operations on numPy arrays are significantly faster than Python lists
- NumPy's core feature is the **ndarray, a powerful N-dimensional array object that allows for efficient manipulation of large datasets**. These are **multidimensional array object**

### Topics covered:
- creating/accessing/modifying arrays
- np methods to create arrays: zeros , ones, full, eye, random, randomint, arrange
- array indexing / slicing: integer array indexing, boolean array indexing

## Arrays
- A numpy array is a grid of values, all of the same type, and is indexed by a tuple of nonnegative integers. 
- **The number of dimensions is the rank of the array**; the shape of an array is a tuple of integers giving the size of the array along each dimension.
- A rank 1 array (also known as a 1-dimensional array) is essentially a list of values.


**We can initialize numpy arrays from nested Python lists, and access elements using square brackets:**

In [2]:
import numpy as np

In [3]:
# create an array of shape (3)
a = np.array([5, 3, 7])   # Create a rank 1(i.e. 1D) array
# a = np.array([5, 3, 7], dtype=np.int16)   # MORE ON dtype later
print(a)

print(type(a), a.ndim)    # <class 'numpy.ndarray'> 1
print(a.shape)            # (3,)
############################
# Each row represents a student, each column represents a subject (Math, Science, English)
scores = np.array([[85, 90, 78],   # Student 1
                   [88, 76, 92]])  # Student 2

print(scores)

print(type(scores), scores.ndim)    # <class 'numpy.ndarray'> 2
print(scores.shape)            # (2, 3)

########OPTIONAL##########################################
# Creating a 3D array with shape (2, 3, 5)
b = np.array([
    [  # First block (depth index 0)
        [1, 2, 3, 4, 5],      # row1
        [6, 7, 8, 9, 10],     # row2
        [11, 12, 13, 14, 15]  # row3
    ],
    [  # Second block (depth index 1)
        [16, 17, 18, 19, 20], # row1
        [21, 22, 23, 24, 25], # row2
        [26, 27, 28, 29, 30]  # row3
    ]
])
print(b)
print(type(b), b.ndim)    # <class 'numpy.ndarray'> 3
print(b.shape)            # (2, 3, 5)

# create an array of shape (2,2,2)
b = np.array([
    [ # First block (depth index 0)
        [1, 2],
        [3, 4]
    ],
    [ # Second block (depth index 1)
        [5, 6],
        [7, 8]
    ]
])
print(b)
print(type(b), b.ndim)    # <class 'numpy.ndarray'> 3
print(b.shape)            # (2, 2, 2)
##################################################

[5 3 7]
<class 'numpy.ndarray'> 1
(3,)
[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'> 2
(2, 3)
[[[ 1  2  3  4  5]
  [ 6  7  8  9 10]
  [11 12 13 14 15]]

 [[16 17 18 19 20]
  [21 22 23 24 25]
  [26 27 28 29 30]]]
<class 'numpy.ndarray'> 3
(2, 3, 5)
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
<class 'numpy.ndarray'> 3
(2, 2, 2)


In [4]:
# access elements and modify single element

# access 1D array and modify elements
a = np.array([50, 3, 2])   # Create a rank 1(i.e. 1D) array
print(a[2])
print(a[0], a[1], a[2])   
a[0] = 5                  # Change an element of the array

# access 2D array and modify elements
# Use: b[row_index][column_index]
# or   b[row_index, column_index]
b = np.array([[1,2,3],
              [4,5,6]]
)    # Create a rank 2 array
print(b[0][0])
print(b[0, 0])
# print(type(b), b.ndim)
# print(b.shape)


print(b[0][0], b[0][1], b[1][0])   # Prints "1 2 4"
print(b[0, 0], b[0, 1], b[1, 0])   # Prints "1 2 4"

b[0,2] = 9 # change an element
print(b)

#access 3D array and modify elements
# Use: b[block_index][row_index][column_index]
# or   b[block_index, row_index, column_index]
b = np.array([
    [  # First block (depth index 0)
        [1, 2, 3, 4, 5],      # row1
        [6, 7, 8, 9, 10],     # row2
        [11, 12, 13, 14, 15]  # row3
    ],
    [  # Second block (depth index 1)
        [16, 17, 18, 19, 20], # row1
        [21, 22, 23, 24, 25], # row2
        [26, 27, 28, 29, 30]  # row3
    ]
])
print(type(b), b.ndim)    # <class 'numpy.ndarray'> 3
print(b.shape)            # (2, 3, 5)
print(b[0][2][0])
print(b[0, 2, 0])

print(b[0][2][0], b[0][2][1], b[0] [2] [2])   # print '11 12 13'
print(b[0, 2, 0], b[0, 2, 1], b[0, 2, 2])   # print '11 12 13'

2
50 3 2
1
1
1 2 4
1 2 4
[[1 2 9]
 [4 5 6]]
<class 'numpy.ndarray'> 3
(2, 3, 5)
11
11
11 12 13
11 12 13


**Numpy also provides many special methods to create arrays**

In [5]:
a = np.zeros((10)) # create a 1D array of 10 elements with all 0
print(a)
# print(type(a), a.ndim)
# print(a.shape)


a = np.zeros((2,2))   # Create an 2X2 array of all zeros
print(a)              # Prints "[[ 0.  0.]
                      #          [ 0.  0.]]"

a = np.zeros((3,3))
print(a)

a = np.zeros((3,5))
print(a)


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0.]
 [0. 0.]]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [6]:
#create arrays of all 1
b = np.ones((4))    # Create an array of all ones
print(b)           # [1. 1. 1. 1.]   

a = np.ones((3,5)) # create 3x5 array of all 1
print(a)
###############################

# fill an array with some constant value
a = np.full((3), fill_value=5)
print(a)

a = np.full((3,4), fill_value=9)
print(a)

a = np.full((2,2), 7)  # Create a constant array
print(a)               # Prints "[[ 7.  7.]
                       #          [ 7.  7.]]"


[1. 1. 1. 1.]
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[5 5 5]
[[9 9 9 9]
 [9 9 9 9]
 [9 9 9 9]]
[[7 7]
 [7 7]]


In [7]:
#  now lets create an array of identity matrix
d = np.eye(2)         # Create a 2x2 identity matrix
print(d)              # Prints "[[ 1.  0.]
                      #          [ 0.  1.]]"

d = np.eye(3)
print(d)

d = np.eye(3, 5)
print(d)

[[1. 0.]
 [0. 1.]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]]


In [8]:
e = np.random.random((10))  # Create an 1Darray of size 3 filled with random values
print(e)
# print(type(e), e.shape)


e = np.random.random((2,2))  # Create an array filled with random values
print(e)                     # Might print "[[ 0.91940167  0.08143941]
                             #               [ 0.68744134  0.87236687]]"
# print(type(e), e.shape)

e = np.random.random((2,4))  # Create an array filled with random values
print(e)
# print(type(e), e.shape)

# Generate a 1D array with 3 elements with random integers between -10 (inclusive) and 15 (exclusive)
e = np.random.randint(-10, 15, size=3)
print(e)
# print(type(e), e.shape)


# Generate a 3x3 matrix with random integers between -10 (inclusive) and 15 (exclusive)
e = np.random.randint(-10, 15, size=(3, 3))
print(e)
# print(type(e), e.shape)


[0.35403177 0.71396044 0.36858589 0.78592146 0.2692566  0.04782898
 0.43386793 0.90667118 0.50261776 0.74111145]
[[0.69262319 0.89904044]
 [0.53335934 0.77542707]]
[[0.02912825 0.64526763 0.06540659 0.8784934 ]
 [0.56980707 0.62059308 0.77322786 0.43627029]]
[-6  4 -2]
[[ 11  -4  12]
 [  2  -5   6]
 [ 11 -10   8]]


In [9]:
# arange

# Example 1: Basic usage with start and stop
arr = np.arange(5)  # Creates an array from 0 to 4
print("Example 1 - Array from 0 to 4:")
print(arr)

# Example 2: Specifying start and stop
arr = np.arange(2, 10)  # Creates an array from 2 to 9
print("\nExample 2 - Array from 2 to 9:")
print(arr)

# Example 3: Specifying start, stop, and step
arr = np.arange(1, 10, 2)  # Creates an array from 1 to 9 with a step of 2
print("\nExample 3 - Array from 1 to 9 with step of 2:")
print(arr)

# Example 4: Using a negative step
arr = np.arange(10, 0, -2)  # Creates an array from 10 to 2 with a step of -2
print("\nExample 4 - Array from 10 to 2 with step of -2:")
print(arr)

# Example 5: Specifying dtype
arr = np.arange(0, 1, 0.1, dtype=float)  # Creates an array of floats from 0 to 0.9
print("\nExample 5 - Array of floats from 0 to 0.9:")
print(arr)

Example 1 - Array from 0 to 4:
[0 1 2 3 4]

Example 2 - Array from 2 to 9:
[2 3 4 5 6 7 8 9]

Example 3 - Array from 1 to 9 with step of 2:
[1 3 5 7 9]

Example 4 - Array from 10 to 2 with step of -2:
[10  8  6  4  2]

Example 5 - Array of floats from 0 to 0.9:
[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]


## Array indexing

**Slicing**: Similar to Python lists, numpy arrays can be sliced. Since arrays may be multidimensional, you must **specify a slice for each dimension of the array**:

In [12]:
# Create the following rank 2D array with shape (4, 4)
# a = np.array([
#      [6 3 7 4 6]
#      [9 2 6 7 4]
#      [3 7 7 2 5]
#      [4 1 7 5 1]]
# )

np.random.seed(42) # Set the seed for reproducibility

a = np.random.randint(0, 10, size=(4, 5))
print(a)


# Use slicing to pull out the subarray consisting of the first 2 rows(i.e. rows 0 and 1)
# and columns 1 and 2; b is the following array of shape (2, 2):
# [[3 7]
#  [2 6]]
b = a[0:2, 1:3] # or b = a[:2, 1:3]
print(b)

# A slice of an array is a view into the same data, so modifying it
# will modify the original array.
print(a[0, 1])   # Prints "2"
b[0, 0] = 77     # b[0, 0] is the same piece of data as a[0, 1]
print(a[0, 1])   # Prints "77"
print(a)

[[6 3 7 4 6]
 [9 2 6 7 4]
 [3 7 7 2 5]
 [4 1 7 5 1]]
[[3 7]
 [2 6]]
3
77
[[ 6 77  7  4  6]
 [ 9  2  6  7  4]
 [ 3  7  7  2  5]
 [ 4  1  7  5  1]]


In [None]:
# (OPTIONAL) You can also mix integer indexing with slice indexing. However, doing
# so will yield an array of lower rank than the original array.

# Create the following rank 2 array with shape (3, 4)
a = np.array([[1,2,3,4],
              [5,6,7,8],
              [9,10,11,12]]
)

# Two ways of accessing the data in the middle row of the array.
# Mixing integer indexing with slices yields an array of lower rank,
# while using only slices yields an array of the same rank as the original array:
row_r1 = a[1, :]    # Rank 1 view of the second row of a
row_r2 = a[1:2, :]  # Rank 2 view of the second row of a
print(row_r1, row_r1.shape)  # Prints "[5 6 7 8] (4,)"
print(row_r2, row_r2.shape)  # Prints "[[5 6 7 8]] (1, 4)"

# We can make the same distinction when accessing columns of an array:
col_r1 = a[:, 1]
col_r2 = a[:, 1:2]
print(col_r1, col_r1.shape)  # Prints "[ 2  6 10] (3,)"
print(col_r2, col_r2.shape)  # Prints "[[ 2]
                             #          [ 6]
                             #          [10]] (3, 1)"

Integer array indexing:
- When you index into numpy arrays using slicing, the resulting array view will always be a subarray of the original array.
- In contrast, integer array indexing allows you to construct arbitrary arrays using the data from another array.
Here is an example:

In [None]:
# (SKIP) One useful trick with integer array indexing is selecting or mutating one element from each row of a matrix:

# Create a new array from which we will select elements
a = np.array([[1,2,3],
              [4,5,6],
              [7,8,9],
              [10, 11, 12]]
)

print(f"a: \n{a}")

# Create an array of col_indices: 0, 2, 0, 1
col_indices = np.array([0, 2, 0, 1])

# Select one element from each row of a using the indices in col_indices
print(a[np.arange(4), col_indices])  # Prints "[ 1  6  7 11]"

# Mutate one element from each row of a using the indices in col_indices
a[np.arange(4), col_indices] += 10

print(a)  # prints "array([[11,  2,  3],
          #                [ 4,  5, 16],
          #                [17,  8,  9],
          #                [10, 21, 12]])

**Boolean array indexing**: Boolean array indexing lets you pick out arbitrary elements of an array. Frequently this type of indexing is used to select the elements of an array that satisfy some condition.

Here is an example:

In [None]:

a = np.array([
      [1, 2], # city1
      [3, 4], # city2
      [3, 4],
      [8, 3],
])

# select elements equal to 3:
print(a[(a == 3)]) 
# print(len(a[(a == 3)])) # gives count


# select elements not equal to 3:
print(a[(a != 3)]) 
# print(a[~(a == 3)])

# select all elem > 2 # UNHEALTHY water
print(a[a > 2])     # [3 4 5]

# select all elem > 2 and < 5 # MODERATE
print(a[(a > 2) & (a < 5)]) # [3 4]

# select elements less than 2 or greater than 4:
print(a[(a < 2) | (a > 4)])



#############################
#SKIP
bool_idx = (a > 2)   # Find the elements of a that are bigger than 2;
                     # this returns a numpy array of Booleans of the same
                     # shape as a, where each slot of bool_idx tells
                     # whether that element of a is > 2.

print(bool_idx)      # Prints "[[False False]
                     #          [ True  True]
                     #          [ True  True]]"

# We use boolean array indexing to construct a rank 1 array
# consisting of the elements of a corresponding to the True values
# of bool_idx
print(a[bool_idx])  # Prints "[3 4 5 6]"

[3 3 3]
[1 2 4 4 8]
[3 4 3 4 8 3]
[3 4 3 4 3]
[1 8]
[[False False]
 [ True  True]
 [ True  True]
 [ True  True]]
[3 4 3 4 8 3]


In [23]:
import numpy as np

# Contaminant levels for 4 cities (columns: [Contaminant A, Contaminant B])
water_data = np.array([
    [1, 2],  # City 1
    [3, 4],  # City 2
    [3, 4],  # City 3
    [8, 3],  # City 4
])

# Select only column 1 (Contaminant A)
cont_A = water_data[:, 0]

# 1. Contaminant A readings equal to 3 (Warning Level)
warning_A = cont_A[cont_A == 3]
print("Contaminant A readings equal to 3:", warning_A)

# 2. Count how many times warning level 3 appears in Contaminant A
print("Count of warning-level readings in Contaminant A (== 3):", np.sum(cont_A == 3))

# 3. Readings in Contaminant A NOT equal to 3
print("Contaminant A values not equal to 3:", cont_A[cont_A != 3])

# 4. Readings in Contaminant A above 2 (considered UNHEALTHY)
print("Contaminant A unhealthy levels (>2):", cont_A[cont_A > 2])

# 5. Moderate concern levels in Contaminant A (>2 and <5)
print("Contaminant A moderate concern levels (>2 and <5):",
     cont_A[(cont_A > 2) & (cont_A < 5)]
)

# 6: Apply condition: A > 3 and B > 2
print("Cities where Contaminant A > 3 and Contaminant B > 2:",
      water_data[(water_data[:, 0] > 3) & (water_data[:, 1] > 2)]
)

# OR USE FOLLOWING
# mask = (water_data[:, 0] > 3) & (water_data[:, 1] > 2)
# print("Cities where Contaminant A > 3 and Contaminant B > 2:", water_data[mask])

# 7. OR condition: Contaminant A > 6 OR Contaminant B < 3
mask = (water_data[:, 0] > 6) | (water_data[:, 1] < 3)
print("Cities where Contaminant A > 6 OR Contaminant B < 3:", water_data[mask])

Contaminant A readings equal to 3: [3 3]
Contaminant A values not equal to 3: [1 8]
Contaminant A unhealthy levels (>2):
[3 3 8]
Contaminant A moderate concern levels (>2 and <5):
[3 3]
Cities where Contaminant A > 3 and Contaminant B > 2: [[8 3]]
