<a href="https://colab.research.google.com/github/huuphong1995/hello-world/blob/master/Learning_Numpy_Arrray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Numpy and Array Basics

The numpy library is one of the core packages in Python's data science software stack. Many other Python data analysis libraries require numpy as a prerequisite, because they use its array data structure as a building block. The Kaggle Python environment has numpy available by default; if you are running Python locally, the Anaconda Python distribution comes with numpy as well.

Numpy implements a data structure called the N-dimensional array or ndarray. ndarrays are similar to lists in that they contain a collection of items that can be accessed via indexes. On the other hand, ndarrays are homogeneous, meaning they can only contain objects of the same type and they can be multi-dimensional, making it easy to store 2-dimensional tables or matrices.



In [1]:
# To work with narrays, we need to load the numpy library.

import numpy as np

In [None]:
# access the numpy library's functions using the shorthand "np"

my_list = [1, 2, 3, 4]                      # define a list

my_array = np.array(my_list)                # Pass the list to np.array()

print(type(my_array))                       # Check the object's typetype

print(my_array)                             

In [None]:
# To create an array with more than 1-Dimension, pass a nested list to np.array()

second_list = [5,6,7,8]

two_d_array = np.array([my_list, second_list])

print(two_d_array)

In [None]:
two_d_array.size

In [12]:
two_d_array.dtype

dtype('int64')

In [16]:
# np.identity() to create a square 2d array with 1's across the diagonal

np.identity(n = 5)          # Size of the array 

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [None]:
# np.eye() to create a 2d array with 1's across a specified diagonal

np.eye(N = 3,  # Number of rows
       M = 5,  # Number of columns
       k = 1)  # Index of the diagonal (main diagonal (0) is default)

In [21]:
# np.one() to create an array filled with ones:

np.ones(shape = [2,4])

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [23]:
# np.zeros() to create an array filled with zeros:

np.zeros(shape = [4,6])

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

Array Indexing and Slicing

In [None]:
# Numpy ndarrays offer numbered indexing and slicing syntax that mirrors the syntax for Python lists:

one_d_array = np.array([1,2,3,4,5,6])

one_d_array[3]        # Get the item at index 3

In [28]:
one_d_array[3:]       # Get a slice from index 3 to the end

array([4, 5, 6])

In [32]:
one_d_array[:3]       # get a slice from index 3 to the end

array([1, 2, 3])

In [33]:
one_d_array[::-1]      # Slice backwards to reverse the arrayarray

array([6, 5, 4, 3, 2, 1])

In [35]:
# If an ndarray has more than 1-Dimension, separate indexes for each dimension with a comma:

two_d_array = np.array([one_d_array, one_d_array + 6, one_d_array +12])

print(two_d_array)

[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]]


In [37]:
# get the element at row index 1, column index 4

two_d_array[1, 4]

11

In [39]:
# Reverse both dimensions (180 degree rotation)

two_d_array[::-1, ::-1]

array([[18, 17, 16, 15, 14, 13],
       [12, 11, 10,  9,  8,  7],
       [ 6,  5,  4,  3,  2,  1]])

Reshaping Arrays

In [None]:
# Numpy has a varvariety of built in functions to help you manipulate arrays quickly without having to use complicated indexing operations

# Reshape an array into a new array with the same data but different structure with np.reshape():

np.reshape(a =two_d_array,                   # Array to reshapereshape
           newshape = (6,3))                 # Dimensions of the new array

In [60]:
# Unravel a multi-dimensional into 1-dimension with np.ravel()

np.ravel(a = two_d_array, order = 'C')     # Use C-style unraveling (by rows)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18])

In [43]:
# Unravel a multi-dimensional into 1-dimensional using F-style 

np.ravel(a = two_d_array, order ='F' )      # Use Fortran-style unraveling (by columns)

array([ 1,  7, 13,  2,  8, 14,  3,  9, 15,  4, 10, 16,  5, 11, 17,  6, 12,
       18])

In [None]:
# Alternatively, use ndarray.flatten() to flatten a multi-dimensional into 1 dimension and return a copy of the result
two_d_array.flatten()

In [None]:
# Get the transpose of an array with ndarray.T

two_d_array.T            # change the number of rows into number of columns, number of columns into number of rows

In [49]:
# Flip an array vertically or horizontally with np.flipud() and np.fliplr() respectively

np.flipud(two_d_array)

array([[13, 14, 15, 16, 17, 18],
       [ 7,  8,  9, 10, 11, 12],
       [ 1,  2,  3,  4,  5,  6]])

In [51]:
np.fliplr(two_d_array)

array([[ 6,  5,  4,  3,  2,  1],
       [12, 11, 10,  9,  8,  7],
       [18, 17, 16, 15, 14, 13]])

In [None]:
# Rotate an array 90 degrees counter-clockwise with np.rot90()

np.rot90(two_d_array, k=1)

In [56]:
# Shift elements in an array along a given dimension with np.roll()
# axis =1 represents columns
np.roll(a = two_d_array,
        shift = 2,
        axis =1)

array([[ 5,  6,  1,  2,  3,  4],
       [11, 12,  7,  8,  9, 10],
       [17, 18, 13, 14, 15, 16]])

In [58]:
# axis = 0 represents rows
np.roll(a= two_d_array,
        shift =2,
        axis =0)

array([[ 7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18],
       [ 1,  2,  3,  4,  5,  6]])

In [59]:
# leave the axis argument empty to shift on a flattend version of the array (shift across all dimension)

np.roll(a = two_d_array,
        shift =2)

array([[17, 18,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 16]])

In [None]:
# Join arrays along an axis with np.concatenate()

array_to_join = np.array([[10,20,30], [40,50,60], [70,80,90]])

np.concatenate((two_d_array, array_to_join),
               axis=1)

Array Math Operations

In [66]:
two_d_array + 100    # Add 100 to each element

array([[101, 102, 103, 104, 105, 106],
       [107, 108, 109, 110, 111, 112],
       [113, 114, 115, 116, 117, 118]])

In [68]:
two_d_array -100     # Substract 100 from each element

array([[-99, -98, -97, -96, -95, -94],
       [-93, -92, -91, -90, -89, -88],
       [-87, -86, -85, -84, -83, -82]])

In [69]:
two_d_array *2        # Multiply each element by 2

array([[ 2,  4,  6,  8, 10, 12],
       [14, 16, 18, 20, 22, 24],
       [26, 28, 30, 32, 34, 36]])

In [70]:
two_d_array **2        # Squire each element

array([[  1,   4,   9,  16,  25,  36],
       [ 49,  64,  81, 100, 121, 144],
       [169, 196, 225, 256, 289, 324]])

In [72]:
two_d_array % 2        # Take modulus of each element

array([[1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0]])

In [73]:
small_array1 = np.array([[1,2], [3,4]])        # Adding two array

small_array1 + small_array1

array([[2, 4],
       [6, 8]])

In [75]:
small_array1 * small_array1                     # Multiply two array

array([[ 1,  4],
       [ 9, 16]])

In [76]:
small_array1 ** small_array1                    # [1^1, 2^2], [3^3, 4^4]

array([[  1,   4],
       [ 27, 256]])

In [None]:
# Numpy also offers a variety of named math functions for ndarrays. There are too many to cover in detail here, so we'll just look at a selection of some of the most useful ones for data analysis

In [81]:
# Get the mean of all the elements in an array with np.mean()

np.mean(two_d_array, axis =1)            # array([ 3.5,  9.5, 15.5])

np.mean(two_d_array, axis =0)            # array([ 7.,  8.,  9., 10., 11., 12.])

array([ 7.,  8.,  9., 10., 11., 12.])

In [82]:
# Get the standard deviation all the elements in an array with np.std()

np.std(two_d_array)

5.188127472091127

In [87]:
# Provide an axis argument to get standard deviations across a dimension

np.std(two_d_array, axis=1)                   # array([1.70782513, 1.70782513, 1.70782513])

np.std(two_d_array, axis=0)                   # array([4.89897949, 4.89897949, 4.89897949, 4.89897949, 4.89897949, 4.89897949])

array([4.89897949, 4.89897949, 4.89897949, 4.89897949, 4.89897949,
       4.89897949])

In [88]:
# Sum the elements of an array across an axis with np.sum()

np.sum(two_d_array, axis=1)        # Get the row sums

np.sum(two_d_array, axis= 0)        # Get the column sums

array([21, 24, 27, 30, 33, 36])

In [90]:
# Take the log of each element in an array with np.log()

np.log(two_d_array)

array([[0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791,
        1.79175947],
       [1.94591015, 2.07944154, 2.19722458, 2.30258509, 2.39789527,
        2.48490665],
       [2.56494936, 2.63905733, 2.7080502 , 2.77258872, 2.83321334,
        2.89037176]])

In [91]:
# Take the square root of each element with np.sqrt()

np.sqrt(two_d_array)

array([[1.        , 1.41421356, 1.73205081, 2.        , 2.23606798,
        2.44948974],
       [2.64575131, 2.82842712, 3.        , 3.16227766, 3.31662479,
        3.46410162],
       [3.60555128, 3.74165739, 3.87298335, 4.        , 4.12310563,
        4.24264069]])

In [92]:
# Take the vector dot product of row 0 and row 1

np.dot(two_d_array[0,0:],  # Slice row 0
       two_d_array[1,0:])  # Slice row 1
       

217

In [98]:
# Do a matrix multiply

np.dot(small_array1, small_array1)   # multiply 2-dimensional array

array([[ 7, 10],
       [15, 22]])

104

Wrap up

Numpy's ndarray data structure provides many desirable features for working with data, such as element-wise math operations and a variety of functions that work on 2D arrays. Since numpy was built with data analysis in mind, its math operations are optimized for that purpose and are generally faster than what could be achieved if you hand-coded functions to carry out similar operations on lists.