In [3]:
import numpy as np
import pandas as pd

# Introduction to numpy (Numerical Python)

### Why numpy
<ol>
<li>NumPy brings the <b>computational power</b> of languages like <b>C and Fortran to Python</b>, a language much easier to learn and use.
    <br> </br> </li>
    
<li> It is used in every field of science and engineering and is the <b>universal standard</b> for working with <b>numerical data</b> in Python. 
     <br> </br> </li>

<li> <b>Essential</b> tool for beginners as well as experienced researchers doing state-of-the-art scientific and industrial research and development.
     <br> </br> </li>
<li> NumPy API are use in <b>Pandas, SciPy, Matplotlib, Scikit-learn, Scikit-image, Pytorch, Tensorflow</b> and most other data science and scientific Python packages.
     <br> </br> </li>
</ol>

In [4]:
# Comparison of processing speed of numpy and python lists

#Subtract a constant number 32 from 1000 numbers using python list and using numpy

import timeit
import numpy as np
import random


python_list = random.sample(range(1000), 1000)

def python_code():
    return [val-32 for val in python_list]


np_array = np.random.random(1000)
def numpy_code():
    return np_array-32

n = 100000
t_python = timeit.timeit(python_code, number=n)
t_numpy = timeit.timeit(numpy_code, number=n)

print('Time python', t_python)
print('Time numpy', t_numpy)
print('Speed Comparision', t_python/t_numpy)

Time python 5.1915794740000365
Time numpy 0.1676079320000099
Speed Comparision 30.974545250040613


### What is numpy?
<ol>
<li> The NumPy library contains multidimensional array and matrix data structures. 
    <br> </br> </li>
<li> It provides <b>ndarray</b>, a homogeneous n-dimensional array object, with methods to efficiently operate on it. 
    <br> </br> </li>
<li> NumPy can be used to perform a wide variety of <b>mathematical operations on arrays</b>.
    <br> </br> </li>
<li> It adds powerful data structures to Python that guarantee <b>efficient calculations</b>.
    <br> </br> </li>
<li> It also supplies an <b>enormous library</b> of high-level mathematical functions.
    <br> </br> </li>
<li> It is an <b>open source</b> Python library for scientific computing.
    <br> </br> </li>


Some applications of numpy and linear algebra:

![Why_Numpy](Why_Numpy.png) 

**Recent case studies in science and technology**


![CaseStudies](Case_Studies.png)

# Define matrices and vectors

### Convert python datastructures to numpy array

In [6]:
# Define np array using python list

matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

print(matrix)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [8]:
# Define np array using pandas dataframe

df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [2, 6, 7, 8], "C": [9, 10, 11, 12]})

#Convert to numpy array using to_numpy function
matrix = df.to_numpy()

print(df,'\n')
print(matrix)

   A  B   C
0  1  2   9
1  2  6  10
2  3  7  11
3  4  8  12 

[[ 1  2  9]
 [ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]]


### Define vector, array and tensor

In [9]:
# Define a vector from linear algebra course

vector = np.array([1,2,3,4,5,6])

print(vector)

[1 2 3 4 5 6]


In [10]:
#Define an array from linear algebra course

matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

print(matrix)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [11]:
# Define a tensor from linear algebra course

tensor = np.array([matrix,matrix])
print(tensor.shape)
print(tensor)

(2, 3, 4)
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]]


### Define special arrays

We are going to explore the following functions:

<ol>
<li> Define an array of 1s: np.ones()
    <br> </br> </li>
<li> Define an array of 0s: np.zeros()
    <br> </br> </li>
<li> Define an array of constant: np.full()
    <br> </br> </li>
<li> Define interval and range arrays: np.interval() and np.range()
    <br> </br> </li>
  </ol>

The same concepts can be extended to define vectors and tensors

In [23]:
# Define an array with zero values
# All these functions can be extended to define any n-dimensional numpy array

zero_arr = np.zeros((3,4,5,1))

print(zero_arr)

[[[[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   [0.]
   [0.]]]]


In [25]:
# Define an array of 1s

one_arr = np.ones((3,2))

print(one_arr)

[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [28]:
# Define a array of constant values:

const_arr = np.full((5,2), 10.)

print(const_arr)

[[10. 10.]
 [10. 10.]
 [10. 10.]
 [10. 10.]
 [10. 10.]]


In [30]:
# Define interval and range arrays

range_arr = np.arange(2,9,2)               #start, end and interval

interval_arr = np.linspace(2,8, num=4)    # start, end and number of elements

print("Range array in numpy is:\n {}\n".format(range_arr))
print("Interval array in numpy is:\n {}\n".format(interval_arr))


Range array in numpy is:
 [2 4 6 8]

Interval array in numpy is:
 [2. 4. 6. 8.]



### Reading and writing arrays

In [31]:
# How to save NumPy objects

arr = np.arange(10,100,5)
base_path = '.'

#.npy is a standard format for save numpy data
file_name = 'sample_np.npy'

# Using python os module to join directory path and relative file path
import os
out = os.path.join(base_path,file_name)

print("saving array {} in file {}".format(arr, out))

'''
Standard way of reading and writing a file in python. With command opens file handler and closes automatically 
when the loop is finished.
wb is used to write (w) in binary (b) format. For normal string writing use 'w' only. Same while reading file objects 
'''
with open(out, 'wb') as f:
    np.save(f, arr)

# f = open(out, 'wb')
# f.close()

saving array [10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95] in file ./sample_np.npy


In [32]:
# How to read Numpy objects

with open(out,'rb') as f:
    arr1 = np.load(f)
    
print("loaded array {} from file {}".format(arr1, out))


loaded array [10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95] from file ./sample_np.npy


<b>Note</b>: To <b>save multiple arrays in a file</b>, please refer to <b>np.savez</b> and <b>savez_compressed</b> function https://numpy.org/doc/stable/reference/generated/numpy.savez.html#numpy.savez. Useful when dealing with multiple data sources which needs be saved.

    

In [None]:
#Importing and exporting csv

In [33]:
#Saving into csv

file_name = 'sample_np.csv'
out = os.path.join(base_path,file_name)
print("saving array {} in file {}".format(arr, out))

# fmt can be specified as integer, float, string, etc
np.savetxt(out,arr, delimiter=',', fmt='%d')


saving array [10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95] in file ./sample_np.csv


In [34]:


# fmt can be specified as integer, float, string, etc
arr1 = np.genfromtxt(out, delimiter=',')

print("loaded array {} from file {}".format(arr1, out))

loaded array [10. 15. 20. 25. 30. 35. 40. 45. 50. 55. 60. 65. 70. 75. 80. 85. 90. 95.] from file ./sample_np.csv


# Array Features: 
<ol>
<li> Datatypes
    <br> </br> </li>
<li> Indexing and Slicing
    <br> </br> </li>
<li> Shape and Size
    <br> </br> </li>
<li> Resizing and transpose
    <br> </br> </li>

### Datatypes

Let's explore the function **astype()** to change the datatype in an numpy array.

Following are the datatypes in the numpy arrays:
<ol>
<li> integer or int
    <br> </br> </li>
<li> float
    <br> </br> </li>
<li> String
    <br> </br> </li>
  </ol>


For both int and float, depending on the length, it could be 64 bit, 32 bit, 16 bit etc. More details on numpy datatypes is present at https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types

In [35]:
# changing element datatypes in numpy from float to string

interval_arr = np.linspace(2,8, num=4)
print(interval_arr)

print(type(interval_arr[0]))

interval_arr = interval_arr.astype('int64')

print(type(interval_arr[0]))

print(interval_arr)

#Get type using type


[2. 4. 6. 8.]
<class 'numpy.float64'>
<class 'numpy.int64'>
[2 4 6 8]


### Indexing and Slicing

<ol>
<li> An important concept to understand before going into array indexing is <b>axis of array</b>. Axis can be derived from shape of an array.
    <br> </br> </li>
<li> The $i^{th}$ element in shape() function returns the length of the corresponding axis and all the array operations are performed along that specified axis.
    <br> </br> </li>
  </ol>

Slicing: Similar to Python lists, numpy arrays can be sliced. Since arrays may be multidimensional, you must specify a **slice** or **integer** indexing for each dimension/axis of the array

In [37]:
#Already covered in list, FOCUS ON AXIS

arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

print("original array is \n{}\n".format(arr))

print("Subarray till 2nd row and 2nd to 3rd columns \n{}\n".format(arr[1:2, 1:3]))


original array is 
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Subarray till 2nd row and 2nd to 3rd columns 
[[6 7]]



In [39]:
#Mix integer indixing with slice indexing

row = arr[:, :-2]
print("Last row is \n{}\n".format(row))

Last row is 
[[ 1  2]
 [ 5  6]
 [ 9 10]]



### Shape and Size

<ol>
    <li><b>ndarray.shape</b>: Tuple of integers that indicate the number of elements stored along each dimension of the array. If, for example, you have a 2-D array with 2 rows and 3 columns, the shape of your array is (2, 3).
        <br> </br> </li>
    <li><b>ndarray.ndim</b>: Number of dimensions of the array.
        <br> </br> </li>
     <li><b>ndarray.size</b>: Total number of elements of the array. This is the product of the elements of the array’s shape.
        <br> </br> </li>
    </ol>


In [40]:
arr = np.array([[[0, 1, 2, 3],
                 [4, 5, 6, 7]],
                [[0, 1, 2, 3],
                 [4, 5, 6, 7]],
                [[0 ,1 ,2, 3],
                 [4, 5, 6, 7]]])

print("Number of dimension of array is \n{}\n".format(arr.ndim))
print("Total number of elements in the array are \n{}\n".format(arr.size))
print("Shape of the array is \n{}\n ".format(arr.shape))


Number of dimension of array is 
3

Total number of elements in the array are 
24

Shape of the array is 
(3, 2, 4)
 


### Array resizing

In [42]:
# Flatten an array using np.ravel()

print("original array is \n{}\n".format(arr))
print("Flattened array is \n{}\n".format(np.ravel(arr)))
print(arr.shape)

original array is 
[[[0 1 2 3]
  [4 5 6 7]]

 [[0 1 2 3]
  [4 5 6 7]]

 [[0 1 2 3]
  [4 5 6 7]]]

Flattened array is 
[0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7]

(3, 2, 4)


**Note:** You can think of reshaping as first raveling the array (using the given index order), then inserting the elements from the raveled array into the new array using the same kind of index ordering as was used for the raveling.

In [43]:
# np.resize internally calls np.reshape (in place operation) and returns reshaped copy of the array

print("Reshaped array with same number of elements as original \n{}\n".format(np.resize(arr,(4,6))))

print("Reshaped array with less number of elements as original \n{}\n".format(np.resize(arr,(2,3))))

print("Reshaped array with more number of elements as original \n{}\n".format(np.resize(arr,(10,6))))


Reshaped array with same number of elements as original 
[[0 1 2 3 4 5]
 [6 7 0 1 2 3]
 [4 5 6 7 0 1]
 [2 3 4 5 6 7]]

Reshaped array with less number of elements as original 
[[0 1 2]
 [3 4 5]]

Reshaped array with more number of elements as original 
[[0 1 2 3 4 5]
 [6 7 0 1 2 3]
 [4 5 6 7 0 1]
 [2 3 4 5 6 7]
 [0 1 2 3 4 5]
 [6 7 0 1 2 3]
 [4 5 6 7 0 1]
 [2 3 4 5 6 7]
 [0 1 2 3 4 5]
 [6 7 0 1 2 3]]



In [44]:
# Transpose is another resizing operation performed using np.Transpose or .T 

arr = np.arange(6).reshape((2,3))

print("array is \n{}\n".format(arr))

print("Transpose the array is \n{}\n".format(np.transpose(arr)))

print("Transpose the array is \n{}\n".format(arr.T))


array is 
[[0 1 2]
 [3 4 5]]

Transpose the array is 
[[0 3]
 [1 4]
 [2 5]]

Transpose the array is 
[[0 3]
 [1 4]
 [2 5]]



# Arthimetic Operation

<ol>
<li>Addition
    <br> </br> </li>
<li>Array concatination
    <br> </br> </li>
<li>Subtraction
    <br> </br> </li>
<li>Multiplication
    <br> </br> </li>
<li>Division
    <br> </br> </li>
    </ol>

In [45]:
# Shape of an array

x = np.array([[1, 2, 3], [4, 5, 6]])

print(x.shape)
print(x)

(2, 3)
[[1 2 3]
 [4 5 6]]


Axis 0 has length 2 and axis 1 has length 3 <to put in more intuitive details>
    
1. The default axis for array operations is 0 unless otherwise stated in the function defintion
2. **axis=-1** represents the last axis of an array.

In [46]:
# Arthimetic addition of arrays can be performed by simple addition if all the arrays are of same length.

x = np.array([[1, 2], [3, 4]])
y = np.array([[5, 6],[10,11]])

print("x is \n{}\n".format(x))
print("y is \n{}\n".format(y))
print("x+y is \n{}".format(x+y))



x is 
[[1 2]
 [3 4]]

y is 
[[ 5  6]
 [10 11]]

x+y is 
[[ 6  8]
 [13 15]]


In [47]:
#If arrays are of differnt shape the addition will fail

print(np.array([[1, 2], [3, 4]]) + np.array([1,3,4]))


ValueError: operands could not be broadcast together with shapes (2,2) (3,) 

In [48]:
# The same can be achieved by np.add function

print("x+y is \n{}".format(np.add(x,y)))


x+y is 
[[ 6  8]
 [13 15]]


### Array concatination

In [51]:
# Adding using concatenation along 0th axis


print("x is \n{}\n".format(x))
print("y is \n{}\n".format(y))
print("x+y along axis 0 is \n{}".format(np.concatenate((x, y), axis=1)))

# The same can be achieved without providing axis argument

x is 
[[1 2]
 [3 4]]

y is 
[[ 5  6]
 [10 11]]

x+y along axis 0 is 
[[ 1  2  5  6]
 [ 3  4 10 11]]


In [68]:
print("x+y along axis 1 is \n{}".format(np.concatenate((x, y), axis=-1)))

# The same can be achieved by providing axis=1

x+y along axis 1 is 
[[ 1  2  5  6]
 [ 3  4 10 11]]


### Subtraction

In [69]:
# Either subtract directly or use np.subtract

print("x-y is \n{}\n".format(x - y))

print("x-y is \n{}\n".format(np.subtract(x, y)))


x-y is 
[[-4 -4]
 [-7 -7]]

x-y is 
[[-4 -4]
 [-7 -7]]



### Multiplication

In [53]:
# Hardmond or elementwise product. Either multiply or use np.multiply

print(x)
print('\n')
print(y)
print("Elementwise product is \n{}\n".format(x * y))

print("Elementwise product is \n{}\n".format(np.multiply(x, y)))


[[1 2]
 [3 4]]


[[ 5  6]
 [10 11]]
Elementwise product is 
[[ 5 12]
 [30 44]]

Elementwise product is 
[[ 5 12]
 [30 44]]



In [66]:
# Dot Product can be done by np.dot 


print("Matrix multiplication \n{}\n".format(np.dot(x,y)))

Matrix multiplication 
[[25 28]
 [55 62]]



In [64]:
# same can be performed with a vector
vec = np.array([9,10]).reshape(2,1)

print(vec.shape)


print("x is \n{}\n".format(x))
print("vec is \n{}\n".format(vec))

print("Matrix and vector multiplication \n{}\n".format(np.dot(x, vec)))

#np.matmul is a similar function with few differences

(2, 1)
x is 
[[1 2]
 [3 4]]

vec is 
[[ 9]
 [10]]

Matrix and vector multiplication 
[[29]
 [67]]



### Division

In [54]:
# Either divide directly or use np.divide

print("Elementwise product is \n{}\n".format(y / x))
print("Elementwise product is \n{}\n".format(np.divide(y, x)))


Elementwise product is 
[[5.         3.        ]
 [3.33333333 2.75      ]]

Elementwise product is 
[[5.         3.        ]
 [3.33333333 2.75      ]]



In [16]:
# Alternatively you can round off the numbers in division using:

print("Elementwise product is \n{}\n".format(y // x))


Elementwise product is 
[[5 3]
 [3 2]]



# Broadcasting

Broadcasting is a powerful mechanism that allows numpy to work with arrays of different shapes when performing arithmetic operations. Frequently we have a smaller array and a larger array, and we want to use the smaller array multiple times to perform some operation on the larger array.

**Definition:** Subject to certain constraints, the smaller array is broadcast/repeated across the larger array so that they have compatible shapes. <b>Broadcasting provides a means of vectorizing array operations so that looping occurs in C instead of Python</b>. It does this without making needless copies of data and usually leads to efficient algorithm implementations


For example, suppose that we want to add a constant vector to each row of a matrix. We could do it like this:


In [62]:
'''
Adding the vector v to each row of the matrix x is equivalent to forming a matrix vv by stacking multiple copies of v vertically, 
then performing elementwise summation of x and vv. We could implement this approach like this:

'''
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = np.empty_like(x)   # Create an empty matrix with the same shape as x

vv = np.tile(v, (4, 1))   # Stack 4 copies of v on top of each other
print(vv, '\n', vv.shape)                 

y = x + vv  # Add x and vv elementwise
print(y, '\n', y.shape)  


[[1 0 1]
 [1 0 1]
 [1 0 1]
 [1 0 1]] 
 (4, 3)
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]] 
 (4, 3)


In [19]:
# Broadcasting allows us to perform this computation without actually creating multiple copies of v.
#Hence, using broadcasting:

x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = x + v  # Add v to each row of x using broadcasting
print(y)


[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


**Note**: When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions and works its way forward. Two dimensions are compatible when

1. they are equal, or

2. one of them is 1

If these conditions are not met, a ValueError: operands could not be broadcast together exception is thrown, indicating that the arrays have incompatible shapes. The size of the resulting array is the size that is not 1 along each axis of the inputs.


Functions that support broadcasting are called **universal functions**. A list of such functions are available at: https://numpy.org/doc/stable/reference/ufuncs.html#available-ufuncs

# Deletion, Sorting and Conditionals in arrays

### Deletion

In [63]:
#Using np.delete

arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

print("original array is \n {}\n".format(arr))

print("deleting 2nd item along column \n{}\n".format(np.delete(arr, 2, 1)))

print("deleting 2nd item along row \n{}\n".format(np.delete(arr, 2, 0)))


original array is 
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

deleting 2nd item along column 
[[ 1  2  4]
 [ 5  6  8]
 [ 9 10 12]]

deleting 2nd item along row 
[[1 2 3 4]
 [5 6 7 8]]



### Sorting

In [64]:
#Use np.sort 

arr = np.array([[4,1],[3,2],[2,0]])
print("orginal array is \n{}\n".format(arr))

print("Sorted along the last axis \n{}\n".format(np.sort(arr)))    # sort along the last axis

print("Sorted along the first axis \n{}\n".format(np.sort(arr, axis=0)))    #sort along the first axis


print("Sorted the flattened array \n{}\n".format(np.sort(arr, axis=None)))     #sort the flattened array



orginal array is 
[[4 1]
 [3 2]
 [2 0]]

Sorted along the last axis 
[[1 4]
 [2 3]
 [0 2]]

Sorted along the first axis 
[[2 0]
 [3 1]
 [4 2]]

Sorted the flattened array 
[0 1 2 2 3 4]



In [65]:
#Advanced sorting

dtype = [('name', 'S10'), ('height', float), ('age', int)]

values = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
          ('Galahad', 1.7, 38)]

a = np.array(values, dtype=dtype)       # create a structured array

np.sort(a, order=['age', 'height'])

array([(b'Galahad', 1.7, 38), (b'Lancelot', 1.9, 38),
       (b'Arthur', 1.8, 41)],
      dtype=[('name', 'S10'), ('height', '<f8'), ('age', '<i8')])

In [66]:
# Conditionals:

arr = np.array([[4,1],[3,-2],[-12,0]])
print("orginal array is \n{}\n".format(arr))

ind = np.where(arr>0)
print(ind)
arr1 = arr[ind]
print(arr1)

orginal array is 
[[  4   1]
 [  3  -2]
 [-12   0]]

(array([0, 0, 1]), array([0, 1, 0]))
[4 1 3]


# Reading Assignment

### Linear algebra

We encourage you to read the documentation of numpy.linalg at https://numpy.org/doc/stable/reference/routines.linalg.html, specially the functions listed below. This will help you in getting comfortable with reading python package documentations and solve some of the questions in assignment

<b>numpy.linalg APIs</b>:

<ol>
<li> linalg.inv
    <br> </br> </li>
<li> linalg.norm
    <br> </br> </li>
<li> linalg.eig or linalg.eigs
    <br> </br> </li>
<li> linalg.trace, np.eye 
    <br> </br> </li>
    </ol>

### More Advanced Topic

The following topics will be covered in Statistics course
<ol>
<li><b>Descriptive statistics</b>: Maximum, minimum, sum, mean, product, standard deviation, Argsort, Argmin
    <br> </br> </li>
<li>Unique, count, square, sqrt
    <br> </br> </li>
<li>Working with mathematical formulas
    <br> </br> </li>
<li>Plotting arrays with Matplotlib
    <br> </br> </li>
    </ol>
