# Part 3 of 6 - Numerical Computing with NumPy

In [1]:
# Showing zip(x,y) method

a = [1,2,3,4]
b = [5,6,7,8]
for i,j in zip(a,b):
    print(i, j)

1 5
2 6
3 7
4 8


In [8]:
# yield of apples = w1 * temperature + w2 * rainfall + w3 * humidity

# an example of set of values :

w1, w2, w3 = 0.3, 0.2, 0.5

In [10]:
kanto_temp = 73
kanto_rainfall = 67
kanto_humidity = 43

In [12]:
# Substituting into equation, we get:

kanto_yield_apples = kanto_temp * w1 + kanto_rainfall * w2 + kanto_humidity * w3
kanto_yield_apples

56.8

In [13]:
print(f'Expected yield of apples in Kanto region : {kanto_yield_apples}')

Expected yield of apples in Kanto region : 56.8


In [None]:
# Making it slightly easier :

kanto = [73,67,43]
johto = [91,88,64]
hoenn = [87,134,58]
sinnoh = [102,43,37]
unova = [69,96,70]

# Now the variables are in a vector form.

In [None]:
# Same can be done to weights variables.

weights = [w1,w2,w3]

In [32]:
# crop_yield function with the help of zip() method :

kanto = [73,67,43]
johto = [91,88,64]
hoenn = [87,134,58]
sinnoh = [102,43,37]
unova = [69,96,70]

weights = [w1,w2,w3]

def crop_yield(a, b):
    result = 0
    for x, w in zip(a, b):
        result += x * w
        return result

crop_yield(kanto, weights)

# Codes should be in one cell. Bruh. Why ?
    

21.9

# Going from Python lists to Numpy arrays

In [33]:
# The calculation performed inside the crop_yield function is also known as dot product of two vectors.

import numpy as np

Numpy arrays can be created by using the np.array function

In [57]:
kanto = np.array([73,67,43])
kanto


array([73, 67, 43])

In [41]:
weights = np.array([w1,w2,w3])
weights

array([0.3, 0.2, 0.5])

In [46]:
type(kanto)

numpy.ndarray

In [48]:
type(weights)

numpy.ndarray

In [49]:
# NumPy arrays support indexing

weights[0]

0.3

In [51]:
kanto[2]

43

# Operating on NumPy arrays

In [55]:
# Dot product of two vectors can be calculated by np.dot function

np.dot(kanto,weights)

56.8

In [56]:
np.dot(sinnoh,weights)

57.699999999999996

In [61]:
(kanto*weights).sum()

56.8

We can achieve same result with lower level operations supported by NumPy arrays :
performing an element-wise multiplication and calculating the sum of resulting numbers

The * operator performs an element-wise multiplication of two arrays(assuming they are same size) and sum() calculates the summation of these values.

In [63]:
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])
help(np.dot)

Help on _ArrayFunctionDispatcher in module numpy:

dot(...)
    dot(a, b, out=None)
    
    Dot product of two arrays. Specifically,
    
    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
      (without complex conjugation).
    
    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
      but using :func:`matmul` or ``a @ b`` is preferred.
    
    - If either `a` or `b` is 0-D (scalar), it is equivalent to
      :func:`multiply` and using ``numpy.multiply(a, b)`` or ``a * b`` is
      preferred.
    
    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
      the last axis of `a` and `b`.
    
    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
      sum product over the last axis of `a` and the second-to-last axis of
      `b`::
    
        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
    
    It uses an optimized BLAS library when possible (see `numpy.linalg`).
    
    Parameters
    -----

In [64]:
arr1 * arr2

array([ 4, 10, 18])

In [65]:
np.dot(arr1,arr2)

32

In [66]:
arr2.sum()

15

# Benefits of using NumPy arrays

In [71]:
# Python lists

arr1 = list(range(10**6))
arr2 = list(range(10**6, 2*10**6))

# NumPy arrays

arr1_np = np.array(arr1)
arr2_np = np.array(arr2)



In [72]:
%%time
result = 0
for x1,x2 in zip(arr1,arr2):
    result += x1*x2

result

CPU times: total: 172 ms
Wall time: 522 ms


833332333333500000

In [75]:
%%time
np.dot(arr1_np,arr2_np)

CPU times: total: 0 ns
Wall time: 1.57 ms


-1942957984

In [77]:
# As you can see, using np.dot is much faster than using python for loop. That is why numpy is beneficial.

# Multi-Dimensional NumPy Arrays

In [80]:
climate_data = np.array([[73,67,43],
                         [91,88,64],
                         [87,134,58],
                         [102,43,47],
                         [69,96,70]])
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  47],
       [ 69,  96,  70]])

In [82]:
# Data above is a 2-D (2 dimensional) array with 5 rows that show regions and 3 columns that show temperature, rainfall and humidity.

In [85]:
# shape of given data

climate_data.shape

# 2-D array (matrix)

(5, 3)

In [86]:
weights.shape

# 1-D array (vector)

(3,)

In [94]:
# 3-D array (tensor)

arr3 = np.array([
    [[11,12,13],
     [14,15,16,]],
    [[17,18,19],
     [20,21,22]]])
arr3

array([[[11, 12, 13],
        [14, 15, 16]],

       [[17, 18, 19],
        [20, 21, 22]]])

In [95]:
arr3.shape

(2, 2, 3)

In [96]:
# All elements in numpy array have the same data structure. You can check the data type by  .dtype  attribute.

In [106]:
weights.dtype

# If a numpy array contains float data, it will be considered as a float data structure.

dtype('float64')

In [107]:
climate_data.dtype

dtype('int32')

In [108]:
arr3.dtype

dtype('int32')

In [109]:
# 'int32' means that 32 bites of storage will be used to store the value.

In [111]:
# To perform matrix multiplication, we can use  np.matmul(a, b)  method

np.matmul(climate_data, weights)

array([56.8, 76.9, 81.9, 62.7, 74.9])

In [113]:
# @ can also be used for matrix multiplication

climate_data @ weights

array([56.8, 76.9, 81.9, 62.7, 74.9])

# Working with CSV data files

Numpy also has helper functions reading from & writing to files. Let's download a file climate.txt, which contains 10,000 climate measurements (temp,rainfall & humidity).

CSV - comma-separated values. It is a file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas. A CSV file typically stores tabular data (numbers and text) in plain text, in which case each line will have the same number of fields. (Wikipedia)

To read this file into a numpy array, we can use the   genfromtxt   function.

In [115]:
import urllib.request

urllib.request.urlretrieve('https://gist.github.com/BirajCoder/a4ffcb76fd6fb221d76ac2ee2b8584e9/raw/4054f90adfd361b7aa4255e99c2e874664094cea','climate.txt')


('climate.txt', <http.client.HTTPMessage at 0x2a17e7ddca0>)

In [116]:
climate_data = np.genfromtxt('climate.txt', delimiter = ',', skip_header = 1)

In [117]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [118]:
climate_data.shape

(10000, 3)

In [119]:
weights

array([0.3, 0.2, 0.5])

In [120]:
weights = np.array([0.3,0.2,0.5])
weights

array([0.3, 0.2, 0.5])

In [121]:
yields = climate_data @ weights
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [122]:
yields.shape

(10000,)

In [123]:
# We can add the  'yields'  back to  'climate_data'  as a fourth column using 
# np.concatenate   function.

In [126]:
climate_results = np.concatenate((climate_data, yields.reshape(10000,1)), axis = 1)                

In [127]:
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [129]:
np.savetxt('climate_results.txt',
           climate_results,
           fmt = '%.2f',
           header = 'temperatue,rainfall,humidity,yield_apples',
           comments = '')


In [133]:
# Common functions in numpy :

# Math : np.sum , np.exp , np.round
# Array manipulation : np.reshape , np.stack , np.concatenate , np.split
# Linear Algebra : np.matmul , np.dot , np.transpose , np.eigvals
# Statistics : np.mean , np.median , np.std , np.max

# Arithmetic operations and broadcasting

In [137]:
arr2 = np.array([[1,2,3,4],
                 [5,6,7,8],
                 [9,10,11,12]])

# Creating a random array.  np.random.randint(lower,upper,(size of matrix))

arr2 = np.random.randint(10,20,(4,4))
arr2

array([[11, 16, 17, 19],
       [15, 11, 14, 19],
       [11, 12, 10, 18],
       [12, 19, 16, 18]])

In [135]:
arr3 = np.random.randint(1,10,(4,4))
arr3

array([[2, 9, 9, 6],
       [1, 9, 5, 2],
       [2, 1, 4, 6],
       [9, 4, 2, 5]])

In [138]:
# Adding a scalar

arr2 + 3

array([[14, 19, 20, 22],
       [18, 14, 17, 22],
       [14, 15, 13, 21],
       [15, 22, 19, 21]])

In [139]:
# Element - wise substraction

arr2 - arr3

array([[ 9,  7,  8, 13],
       [14,  2,  9, 17],
       [ 9, 11,  6, 12],
       [ 3, 15, 14, 13]])

In [140]:
# Modulus with scalar

arr2 % 4

array([[3, 0, 1, 3],
       [3, 3, 2, 3],
       [3, 0, 2, 2],
       [0, 3, 0, 2]], dtype=int32)

In [141]:
arr2 // 4

array([[2, 4, 4, 4],
       [3, 2, 3, 4],
       [2, 3, 2, 4],
       [3, 4, 4, 4]], dtype=int32)

In [142]:
arr2 / 4

array([[2.75, 4.  , 4.25, 4.75],
       [3.75, 2.75, 3.5 , 4.75],
       [2.75, 3.  , 2.5 , 4.5 ],
       [3.  , 4.75, 4.  , 4.5 ]])

In [143]:
arr2 * 10

array([[110, 160, 170, 190],
       [150, 110, 140, 190],
       [110, 120, 100, 180],
       [120, 190, 160, 180]])

In [144]:
arr2 = np.random.randint(1,10,(3,3))
arr2

array([[6, 2, 2],
       [5, 6, 2],
       [9, 1, 5]])

In [145]:
arr2.shape

(3, 3)

In [152]:
arr3 = np.random.randint(1,10,(1,3))
arr3

array([[3, 8, 1]])

In [154]:
arr2 + arr3

# arr2 has 3,3 shape , arr3 has 1,3 shape. When adding, numpy duplicates arr3's first
# row 2 times and it becomes a 3,3 matrix which then can be added to arr2.

array([[ 9, 10,  3],
       [ 8, 14,  3],
       [12,  9,  6]])

In [159]:
arr3 = np.random.randint(1,10,(3,3))
arr3

array([[3, 1, 7],
       [3, 6, 8],
       [1, 6, 8]])

In [161]:
arr2 + arr3

array([[ 9,  3,  9],
       [ 8, 12, 10],
       [10,  7, 13]])

In [166]:
arr2 = np.random.randint(1,10,(1,3
                              ))
arr2

array([[1, 9, 7]])

In [163]:
arr3 = np.random.randint(1,10,(4,3))
arr3

array([[4, 1, 7],
       [9, 2, 4],
       [8, 3, 2],
       [6, 4, 4]])

In [167]:
arr2 + arr3

array([[ 5, 10, 14],
       [10, 11, 11],
       [ 9, 12,  9],
       [ 7, 13, 11]])

In [169]:
# Arrays also support comparison operations

In [170]:
arr2 > arr3

array([[False,  True, False],
       [False,  True,  True],
       [False,  True,  True],
       [False,  True,  True]])

In [171]:
arr2 == arr3

array([[False, False,  True],
       [False, False, False],
       [False, False, False],
       [False, False, False]])

In [173]:
arr3 != arr2

array([[ True,  True, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [175]:
# A common use case for this is to count the number of equal elements in two arrays
# using the  'sum'  method. True evaluates to 1, False evaluates to 0 when booleans 
# are used in arithmetic operations.

In [176]:
(arr2 == arr3).sum()

1

# Array indexing and slicing

In [178]:
arr3 = np.array([[[1,2,3],
               [4,5,6]],
               [[7,8,9],
               [10,11,12]],
               [[13,14,15],
               [16,17,18]]])
arr3

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]],

       [[13, 14, 15],
        [16, 17, 18]]])

In [179]:
arr3.shape

(3, 2, 3)

In [180]:
# Single element

arr3[1,1,2]

12

In [182]:
# Same as previous line

arr3[1][1][2]

12

In [183]:
# Subarray using ranges

arr3[1:, :1, :2]

array([[[ 7,  8]],

       [[13, 14]]])

In [186]:
# What does this do?

arr3[1:][:1][:2]

array([[[ 7,  8,  9],
        [10, 11, 12]]])

In [188]:
# Mixing indices and ranges

arr3[1:, 1, 2]

array([12, 18])

In [191]:
# Using fewer indices

arr3[1]

array([[ 7,  8,  9],
       [10, 11, 12]])

In [192]:
# Using fewer indices

arr3[:2, 1]

array([[ 4,  5,  6],
       [10, 11, 12]])

In [195]:
# Using too many indices

arr3[1,1,2]

12

# Other ways of creating NumPy arrays

In [2]:
import numpy as np
# All zeros
np.zeros((3,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [3]:
# All ones

np.ones((4,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [4]:
# Identity matrix

np.eye(5,5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [8]:
# Random vector

np.random.rand(5)

array([0.86786117, 0.03459154, 0.97098043, 0.16004941, 0.89272883])

In [9]:
# Random matrix

np.random.randn(2,3)

array([[-0.4902893 ,  1.91423785, -1.62284528],
       [-0.47664448, -0.32941828,  1.19201096]])

In [70]:
# Random integers

np.random.randint(10,20,(4,4))

array([[17, 19, 17, 12],
       [17, 14, 14, 17],
       [16, 13, 10, 11],
       [10, 15, 17, 12]])

In [71]:
np.full([2,3],42)

array([[42, 42, 42],
       [42, 42, 42]])

In [11]:
# Range with (start, end, step)

np.arange(10,20,3)

array([10, 13, 16, 19])

In [17]:
# Equally spaced numbers in a range

np.linspace(3, 27, 9)

array([ 3.,  6.,  9., 12., 15., 18., 21., 24., 27.])

# There are examples in the end.

In [18]:
a = [1,2,3]
a

[1, 2, 3]

In [19]:
b = [4,5,6]
b

[4, 5, 6]

In [21]:
a = np.array(a)
b = np.array(b)
a*b

array([ 4, 10, 18])

In [25]:
arr = np.random.randint(1,20,(5,5))
arr

array([[ 7, 18,  6,  7,  1],
       [18, 19,  6, 18, 15],
       [ 6, 13,  3,  2,  9],
       [16,  7, 19, 12,  3],
       [ 7,  2,  9,  8, 19]])

In [26]:
arr = np.arange(1,10)
arr

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [28]:
arr = np.arange(1,10,2)
arr

array([1, 3, 5, 7, 9])

In [29]:
arr.dtype

dtype('int32')

In [31]:
arr

array([1, 3, 5, 7, 9])

In [34]:
arr[3]

7

In [35]:
arr1 = np.random.randint(1,10,(4,4))
arr2 = np.random.randint(1,12,(4,4))
arr1 * arr2

array([[ 9, 32, 24, 63],
       [30, 14, 24, 18],
       [63,  5, 14, 14],
       [10, 99, 54,  7]])

In [36]:
np.dot(arr1,arr2)

array([[152, 165, 136,  90],
       [ 72,  79,  76,  50],
       [ 86, 130, 116,  98],
       [194, 193, 171,  97]])

In [37]:
arr1

array([[3, 8, 4, 7],
       [3, 2, 4, 3],
       [9, 1, 2, 7],
       [2, 9, 9, 7]])

In [38]:
arr2

array([[ 3,  4,  6,  9],
       [10,  7,  6,  6],
       [ 7,  5,  7,  2],
       [ 5, 11,  6,  1]])

In [39]:
arr1 * arr2

array([[ 9, 32, 24, 63],
       [30, 14, 24, 18],
       [63,  5, 14, 14],
       [10, 99, 54,  7]])

In [41]:
a = np.array([1,2,3])
b = np.array([4,5,6])
a * b

array([ 4, 10, 18])

In [42]:
np.dot(a,b)

32

In [48]:
a = np.random.randint(1,10,(3,3))
b = np.random.randint(1,10,(3,3))
a * b

array([[54, 40, 12],
       [25, 42, 18],
       [25,  4,  6]])

In [49]:
np.dot(a,b)

array([[114,  94,  46],
       [110,  91,  42],
       [ 80,  55,  24]])

In [50]:
a + b

array([[15, 13,  7],
       [10, 13,  9],
       [10,  5,  7]])

In [51]:
np.arange(1,1000000)

array([     1,      2,      3, ..., 999997, 999998, 999999])

In [62]:
import time
t1 = time.time()
list_ = [i for i in range(1,30000000)]
t2 = time.time()
t2-t1

1.3186314105987549

In [63]:
t1 = time.time()
a = np.arange(1,30000000)
t2 = time.time()
t2-t1

0.046988725662231445

In [64]:
a = [[[1,2,3],
     [4,5,6]],
     [[7,8,9]]]

a
     

[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9]]]

In [68]:
a = np.array([[2],[34],[45]])
a = np.array(a)

In [69]:
a.dtype

dtype('int32')

In [None]:
# import urllib.request

# urllib.request.retrieve('link of the csv file','will be imported as.txt')

# climate_data = np.genfromtxt('climate.txt', delimiter=',', skip_header=1

In [88]:
arr1 = np.random.randint(1,10,(5,5))
arr1

array([[9, 2, 3, 8, 6],
       [1, 4, 4, 1, 7],
       [7, 7, 7, 5, 1],
       [1, 7, 5, 2, 2],
       [1, 7, 5, 3, 9]])

In [91]:
arr1=arr1.reshape(1,25)
arr1

array([[9, 2, 3, 8, 6, 1, 4, 4, 1, 7, 7, 7, 7, 5, 1, 1, 7, 5, 2, 2, 1, 7,
        5, 3, 9]])

In [92]:
np.arange(1,10,3)

array([1, 4, 7])

In [93]:
np.arange(1,20,5)

array([ 1,  6, 11, 16])