### NumPy Basics




In [1]:
# Import the numpy library
# np is simply an alias, you may use any other alias, though np is quite standard


In [64]:
#!pip install numpy

In [3]:
import numpy

In [4]:
import numpy as np

In [5]:
# create numpy array
print(np.arange(100))

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


In [6]:
# Creating a 1-D array using a list
# np.array() takes in a list or a tuple as argument, and converts into an array
my_array=np.array([1,2,3])
print(my_array)

[1 2 3]


In [7]:
# Creating a 2-D array using two lists
array_2d=np.array([[1,2,3],[4,5,6]])
print(array_2d)

[[1 2 3]
 [4 5 6]]


In [65]:
array_2d.sum(axis=1)

array([42, 26, 10])

In NumPy, dimensions are called **axes**. In the 2-d array above, there are two axes, having two and three elements respectively. 

In NumPy terminology, for 2-D arrays:
* ```axis = 0``` refers to the rows
* ```axis = 1``` refers to the columns

<img src="numpy_axes.jpg" style="width: 600px; height: 400px">

In [9]:
list1=[3,4,5,7]
list2=[99,43,56,2]
multi_prod=list(map(lambda x,y:x*y,list1,list2))
print(multi_prod)

[297, 172, 280, 14]


In [10]:
# The numpy array way to do it: simply multiply the two arrays
list1=[3,4,5,7]
list2=[99,43,56,2]
array3=np.array(list1)*np.array(list2)
print(array3)

[297 172 280  14]


As you can see, the NumPy way of multiplication is clearly more concise.

Even simple mathematical operations on lists require for loops, unlike with arrays. For example, to calculate the square of every number in a list:

In [11]:
list1=[1,2,3,4]
list_sq=[i**2 for i in list1]
print(list_sq)

[1, 4, 9, 16]


In [12]:
sq_np=np.array(list1)**2
print(sq_np)

[ 1  4  9 16]


This was with 1-D arrays. You'll often work with 2-D arrays (matrices), where the difference would be even greater. With lists, you'll have to store matrices as lists of lists and loop through them. With NumPy, you simply multiply the matrices.

### Creating NumPy Arrays 

There are multiple ways to create numpy arrays, the most commmon ones being:
* Convert lists or tuples to arrays using ```np.array()```, as done above
* Initialise arrays of fixed size (when the size is known) 


In [13]:
list1=[1,2,3]
tuple1=(4,5,6)
array_from_list=np.array(list1)
array_from_tuple=np.array(tuple1)
print(array_from_list)
print(array_from_tuple)

[1 2 3]
[4 5 6]


The other common way is to initialise arrays. You do this when you know the size of the array beforehand.

The following ways are commonly used:
* ```np.ones()```: Create array of 1s
* ```np.zeros()```: Create array of 0s
* ```np.random.random()```: Create array of random numbers
* ```np.arange()```: Create array with increments of a fixed step size
* ```np.linspace()```: Create array of fixed length

In [14]:
# Tip: Use help to see the syntax when required
help(np.array)

Help on built-in function array in module numpy:

array(...)
    array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
          like=None)
    
    Create an array.
    
    Parameters
    ----------
    object : array_like
        An array, any object exposing the array interface, an object whose
        __array__ method returns an array, or any (nested) sequence.
    dtype : data-type, optional
        The desired data-type for the array.  If not given, then the type will
        be determined as the minimum type required to hold the objects in the
        sequence.
    copy : bool, optional
        If true (default), then the object is copied.  Otherwise, a copy will
        only be made if __array__ returns a copy, if obj is a nested sequence,
        or if a copy is needed to satisfy any of the other requirements
        (`dtype`, `order`, etc.).
    order : {'K', 'A', 'C', 'F'}, optional
        Specify the memory layout of the array. If object is not an array

In [15]:
# Creating a 5 x 3 array of ones
np.ones((5,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [16]:
# Notice that, by default, numpy creates data type = float64
# Can provide dtype explicitly using dtype
np.ones((5,3),dtype=np.int)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  np.ones((5,3),dtype=np.int)


array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [17]:
# Creating array of zeros
np.zeros([2,2],dtype=np.int)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  np.zeros([2,2],dtype=np.int)


array([[0, 0],
       [0, 0]])

In [18]:
# Array of random numbers
np.random.random((3,4))

array([[0.93093311, 0.08064533, 0.7760121 , 0.99590998],
       [0.88333879, 0.47557862, 0.76556825, 0.28081198],
       [0.78308341, 0.97096462, 0.85647676, 0.44344911]])

In [19]:
# np.arange()
# np.arange() is the numpy equivalent of range()
# Notice that 10 is included, 100 is not, as in standard python lists

# From 10 to 100 with a step of 5
np.arange(10,100,5)

array([10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90,
       95])

In [20]:
#even numbers 0,100
np.arange(0,100,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
       68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98])

In [21]:
# Create a 4 x 4 random array of integers ranging from 0 to 9
np.random.randint(0,10,(4,4))

array([[4, 9, 4, 4],
       [7, 6, 5, 5],
       [4, 2, 0, 0],
       [0, 3, 9, 5]])

In [22]:
# full()
# tile()
# eye()
# randint()
# sort()
# append()
# delete()
# linespace()

In [23]:
np.sort(np.linspace(15,18,25))

array([15.   , 15.125, 15.25 , 15.375, 15.5  , 15.625, 15.75 , 15.875,
       16.   , 16.125, 16.25 , 16.375, 16.5  , 16.625, 16.75 , 16.875,
       17.   , 17.125, 17.25 , 17.375, 17.5  , 17.625, 17.75 , 17.875,
       18.   ])

In [24]:
help(np.linspace)

Help on function linspace in module numpy:

linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)
    Return evenly spaced numbers over a specified interval.
    
    Returns `num` evenly spaced samples, calculated over the
    interval [`start`, `stop`].
    
    The endpoint of the interval can optionally be excluded.
    
    .. versionchanged:: 1.16.0
        Non-scalar `start` and `stop` are now supported.
    
    .. versionchanged:: 1.20.0
        Values are rounded towards ``-inf`` instead of ``0`` when an
        integer ``dtype`` is specified. The old behavior can
        still be obtained with ``np.linspace(start, stop, num).astype(int)``
    
    Parameters
    ----------
    start : array_like
        The starting value of the sequence.
    stop : array_like
        The end value of the sequence, unless `endpoint` is set to False.
        In that case, the sequence consists of all but the last of ``num + 1``
        evenly spaced samples, so that 

In [25]:
# creates a constant array
np.full((4,4),2)

array([[2, 2, 2, 2],
       [2, 2, 2, 2],
       [2, 2, 2, 2],
       [2, 2, 2, 2]])

In [26]:
arr=[1,2,3,0]
np.tile(arr,40)

array([1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2,
       3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0,
       1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2,
       3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0,
       1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2,
       3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0,
       1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2,
       3, 0, 1, 2, 3, 0])

In [27]:
np.eye(3,dtype=int)

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

### Inspect the Structure and Content of Arrays

It is helpful to inspect the structure of numpy arrays, especially while working with large arrays. Some attributes of numpy arrays are:
* ```shape```: Shape of array (n x m)
* ```dtype```: data type (int, float etc.)
* ```ndim```: Number of dimensions (or axes)
* ```itemsize```: Memory used by each array elememnt in bytes


Let's say you are working with a moderately large array of size 1000 x 300. First, you would want to wrap your head around the basic shape and size of the array. 

In [28]:
rand_array=np.random.random((1000,300))
print(rand_array)

[[0.20468614 0.45811089 0.30923485 ... 0.29019203 0.64549636 0.35556457]
 [0.23317297 0.4477838  0.04336953 ... 0.90394998 0.62560381 0.31034824]
 [0.53713818 0.07589903 0.35865376 ... 0.07867772 0.15567807 0.43080682]
 ...
 [0.70239337 0.79337073 0.02093514 ... 0.93164835 0.31333083 0.42054133]
 [0.58157123 0.10986455 0.42542247 ... 0.49343643 0.90298385 0.49642714]
 [0.60267385 0.51176999 0.6713385  ... 0.23245741 0.11131479 0.61592064]]


In [29]:
print("Shape: {}".format(rand_array.shape))
print("dtype: {}".format(rand_array.dtype))
print("Dimension: {}".format(rand_array.ndim))
print("Item Size: {}".format(rand_array.itemsize))

Shape: (1000, 300)
dtype: float64
Dimension: 2
Item Size: 8


Reading 3-D arrays is not very obvious, because we can only print maximum two dimensions on paper, and thus they are printed according to a specific convention. Printing higher dimensional arrays follows the following conventions:
* The last axis is printed from left to right
* The second-to-last axis is printed from top to bottom
* The other axes are also printed top-to-bottom, with each slice separated by another using an empty line 

Let's see some examples.

In [30]:
array_3d=np.arange(24).reshape(2,3,4)
print(array_3d)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


Apart from the methods mentioned above, there are a few more NumPy functions that you can use to create special NumPy arrays:

np.full(): Create a constant array of any number ‘n’

np.tile(): Create a new array by repeating an existing array for a particular number of times

np.eye(): Create an identity matrix of any dimension

np.random.randint(): Create a random array of integers within a particular range 


In [31]:
array_val=np.array([1,2,3,4])

In [32]:
array_val

array([1, 2, 3, 4])

In [33]:
np.append(array_val,[77])

array([ 1,  2,  3,  4, 77])

In [34]:
array_val=np.array([[1,2,3,4],[8,6,5,4]])

In [35]:
np.append(array_val[0],[33])

array([ 1,  2,  3,  4, 33])

# Day 2

In [36]:
arr1=[32,44,56,7,78]
np.min(arr1)

7

In [37]:
np.max(arr1)

78

In [38]:
# Index
np.argmin(arr1)

3

In [39]:
np.argmax(arr1)

4

In [40]:
len(arr1)

5

In [41]:
arr1[0]

32

In [42]:
arr1[2]

56

In [43]:
# Last Element
arr1[-1]

78

In [44]:
# Iteration
for i in arr1:
    print(i)

32
44
56
7
78


In [45]:
list1=[1,2,3,'r']
for i in list1:
    print(i)

1
2
3
r


In [46]:
# Slicing in Numpy
array_2d=np.array([[9,10,11,12],[5,6,7,8],[1,2,3,4]])
print(array_2d)
print(array_2d[2,1])

[[ 9 10 11 12]
 [ 5  6  7  8]
 [ 1  2  3  4]]
2


In [47]:
# Second row, all column values
print(array_2d[1,:])

[5 6 7 8]


In [48]:
# Second Row, All column values except the last
print(array_2d[1,:-1])

[5 6 7]


In [49]:
print(array_2d[1,1:])

[6 7 8]


In [50]:
# Slicing all rows and the third column 
print(array_2d[:,2])

[11  7  3]


In [51]:
print(array_2d[1,0])

5


In [52]:
# all rows and first 3 columns 
print(array_2d[:,:3])

[[ 9 10 11]
 [ 5  6  7]
 [ 1  2  3]]


# Subsetting

In [53]:
array_subset=np.array([[0,1,2],[3,4,5],[6,7,8],[9,10,11]])

In [54]:
print(array_subset)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [55]:
# print [[4,5],[7,8]]

In [56]:
array_subset[1:3,1:]

array([[4, 5],
       [7, 8]])

In [57]:
array_subset[2,1:]

array([7, 8])

In [58]:
array_subset[1:3,1:3]

array([[4, 5],
       [7, 8]])

# Compare Computation Times in NumPy and Standard Python Lists

Let's compare the computation times of arrays and lists for a simple task of calculating the element-wise product of numbers.


In [59]:
list_1=[i for i in range(1000000)]
list_2=[j**2 for j in range(1000000) ]

# importing time package
import time

# list multiplication

# Initialise start time
t0=time.time()
list_multi=list(map(lambda x,y:x*y,list_1,list_2))
t1=time.time()
list_opr_time=t1-t0
print("list time",list_opr_time)


#numpy array
arr1=np.array(list_1)
arr2=np.array(list_2)
t0=time.time()
arr3=arr1*arr2
t1=time.time()
np_time=t1-t0
print("np Time", np_time)

list time 0.07571244239807129
np Time 0.002790689468383789


In [60]:
time.time()

1640193109.5489166