## Numpy

In [1]:
import numpy as np

In [9]:
# yield_of_apples = w1 * temperature + w2 * rainfall + w3 * humidity
def crop_yield(region, weights):
    result = 0
    for x, w in zip(region, weights):
        result += x * w
    return result

w1, w2, w3 = 0.3, 0.2, 0.5
weights = [w1, w2, w3]

In [11]:
# climate data (temp, rainfall, humidity) for 3 regions 
kanto = [73, 67, 43]
johto = [91, 88, 64]
hoenn = [87, 134, 58]
sinnoh = [102, 43, 37]
unova = [69, 96, 70]

# list -> array
kanto = np.array(kanto)
johto = np.array(johto)
hoenn = np.array(hoenn)
sinnoh = np.array(sinnoh)
unova = np.array(unova)
weights = np.array(weights)

In [12]:
type(weights)

numpy.ndarray

In [13]:
# accessing element in numpy array
hoenn[0]

87

## Numpy Arrays Operations

In [14]:
# dot product
np.dot(kanto, weights)

56.8

In [15]:
# equivalent to dot product
(kanto * weights).sum()

56.8

In [17]:
# summation of np arr
weights.sum()

1.0

In [16]:
# multiplication
arr = np.array([1, 2, 3])
arr1 = np.array([4, 5, 6])

arr * arr1

array([ 4, 10, 18])

## Benefits of Using Numpy Arrays
- Ease of use: Shorter maths expression
- Performance: Shorter runtime

## Multi-dimensional Numpy Arrays

In [18]:
climate_data = np.array([[73, 67, 43],
                         [91, 88, 64],
                         [87, 134, 58],
                         [102, 43, 37],
                         [69, 96, 70]])

In [20]:
# shape (row, column)
print(climate_data.shape)
print(weights.shape)

(5, 3)
(3,)


In [21]:
# dtype
# all elements must be of the same data type
climate_data.dtype

dtype('int32')

In [22]:
# matrix multiplication
np.matmul(climate_data, weights)

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [23]:
# @ to perform matrix multiplication
climate_data @ weights

array([56.8, 76.9, 81.9, 57.7, 74.9])

## Working with CSV

In [26]:
import urllib.request

urllib.request.urlretrieve(
    'https://gist.github.com/BirajCoder/a4ffcb76fd6fb221d76ac2ee2b8584e9/raw/4054f90adfd361b7aa4255e99c2e874664094cea/climate.csv', 
    'climate.txt')

# csv (stored as txt) -> array
climate_data = np.genfromtxt("climate.txt", delimiter=",", skip_header=1)
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [27]:
climate_data.shape

(10000, 3)

In [28]:
# calculate yields using matrix multiplication
yields = climate_data @ weights
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [29]:
yields.shape

(10000,)

In [31]:
# concat the arrays

# axis specifies the dimension of concat
climate_results = np.concatenate( (climate_data, yields.reshape(10000, 1)) , axis=1)
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [32]:
climate_results.shape

(10000, 4)

In [33]:
# array -> csv(stored as txt)
np.savetxt("climate_resultx.txt",
          climate_results,
          fmt="%.2f",
          delimiter=",",
          header = "temperature,rainfall,humidity,yield_apples",
          comments="")

## Arithmetic operations, Broadcasting and Comparison
- Arithmetic operations: Arithmetic operations between two arrays with same numbers of dimensions 
- Arithmetic Broadcast: rithmetic operations between two arrays with different numbers of dimensions but compatible shapes.

## Arithmetic operations

In [34]:
arr2 = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 1, 2, 3]])

arr3 = np.array([[11, 12, 13, 14], 
                 [15, 16, 17, 18], 
                 [19, 11, 12, 13]])

In [35]:
## adding scaler
arr2 + 3

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12,  4,  5,  6]])

In [36]:
# element-wise substraction
arr3 - arr2

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

In [38]:
# division by scakar
arr2 /2

array([[0.5, 1. , 1.5, 2. ],
       [2.5, 3. , 3.5, 4. ],
       [4.5, 0.5, 1. , 1.5]])

In [39]:
# modulus with scalar
arr2 % 4

array([[1, 2, 3, 0],
       [1, 2, 3, 0],
       [1, 1, 2, 3]], dtype=int32)

## Arithmetic Broadcasting

In [40]:
arr2 = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 1, 2, 3]])
# shape(3, 4)

arr4 = np.array([4, 5, 6, 7])
# shape(4,)

In [43]:
# arr4 replicated 3 times to match the shape (3,4)
# replicate w/o creating 3 copies
# replocation only works if one of the arrays can be replicated to match the other array's shape.
arr2 + arr4

array([[ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13,  6,  8, 10]])

In [44]:
arr5 = np.array([7, 8]) # shape(2,)
arr2 + arr5
# ERROR

ValueError: operands could not be broadcast together with shapes (3,4) (2,) 

## Comparison

In [46]:
arr1 = np.array([[1, 2, 3], [3, 4, 5]])
arr2 = np.array([[2, 2, 3], [1, 2, 5]])

In [47]:
arr1 == arr2

array([[False,  True,  True],
       [False, False,  True]])

In [48]:
arr1 != arr2

array([[ True, False, False],
       [ True,  True, False]])

In [49]:
arr1 >= arr2

array([[False,  True,  True],
       [ True,  True,  True]])

In [50]:
arr1 < arr2

array([[ True, False, False],
       [False, False, False]])

In [52]:
# True is 1, False is 0
(arr1 == arr2).sum()

3

## Array Indexing & Slicing

In [53]:
arr3 = np.array([
    [[11, 12, 13, 14], 
     [13, 14, 15, 19]], 
    
    [[15, 16, 17, 21], 
     [63, 92, 36, 18]], 
    
    [[98, 32, 81, 23],      
     [17, 18, 19.5, 43]]])

In [54]:
arr3.shape

(3, 2, 4)

In [56]:
# accessing a single element
arr3[1, 1, 2]

36.0

In [57]:
# ranges (inclusive:exclusive)
arr3[1:, 0:1, :2]

array([[[15., 16.]],

       [[98., 32.]]])

In [58]:
arr3[1:, 1, 3]

array([18., 43.])

In [59]:
arr3[1]

array([[15., 16., 17., 21.],
       [63., 92., 36., 18.]])

## Other Ways of Creating Numpy Arrays

In [60]:
#all zeros
np.zeros((3,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [61]:
np.ones([2, 3, 1])

array([[[1.],
        [1.],
        [1.]],

       [[1.],
        [1.],
        [1.]]])

In [62]:
# identity matirx
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [63]:
# random vector
np.random.rand(5)

array([0.2902183 , 0.78680012, 0.82292097, 0.55569391, 0.17632439])

In [64]:
# random matrix
np.random.randn(2, 3)

array([[-0.04494393,  0.99513403, -0.50500257],
       [ 0.24473675, -0.09295877, -0.76093303]])

In [65]:
# fixed value
np.full([2, 3], 42)

array([[42, 42, 42],
       [42, 42, 42]])

In [69]:
# np.arange(start, end, step)
np.arange(10, 90, 3)

array([10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58,
       61, 64, 67, 70, 73, 76, 79, 82, 85, 88])

In [70]:
# equally spaced numbers in a range
# (start, end, #numbers)
np.linspace(3, 27, 9)

array([ 3.,  6.,  9., 12., 15., 18., 21., 24., 27.])