In [None]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# What is NumPy?

NumPy is an open-source, versatile general-purpose package used for array-processing. It is short of Numerical Python. It is known for its high-end performance with powerful N-dimensional array objects and the tools it is loaded with to work with arrays. The package is an extension of Python and is used to perform scientific computations and other broadcasting functions.

NumPy is easy to use, well-optimized and highly flexible. 

It is the backbone of Machine Learning in Python. It is one of the most important libraries in Python for numerical computations.

It adds support to core Python for multi-dimensional arrays and matrices and fast vectorized operations on these arrays.

# What are the uses of NumPy?

The open-source numerical library on Python supports multi-dimensional arrays and contains matrix data structures. Different types of mathematical operations can be performed on arrays using NumPy. This includes trigonometric operations as well as statistical and algebraic computations. Numeric and Numarray are extensions of NumPy. 

# Why is NumPy preferred to other programming tools such as Idl, Matlab, Octave, Or Yorick?

NumPy is a high-performance library in the Python programming language that allows scientific calculations. It is preferred to Idl, Matlab, Octave, Or Yorick because it is open-source and free. Also, since it uses Python which is a general-purpose programming language, it scores over a generic programming language when it comes to connecting Python’s interpreter to C/C++ and Fortran code. 

NumPy supports multi-dimensional arrays and matrices and helps to perform complex mathematical operations on them. 

# What are the various features of NumPy?

As a powerful open-source package used for array-processing, NumPy has various useful features. They are:

>Contains a N-dimensional array object

>It is interolerable; compatible with many hardware and computing platforms

>Works extremely well with array libraries; sparse, distributed or GPU

>Ability to perform complicated (broadcasting) functions

>Tools that enable integration with C or C++ and Fortran code 

>Ability to perform high-level mathematical functions like statistics, Fourier transform, sorting, searching, linear algebra, etc 

>It can also behave as a multi-dimensional container for generic data

>Supports scientific and financial calculations

In [None]:
# Create a numpy array

np_arr = np.array([[11,23,32],[12,34,56],[77,77,54]])
print(np_arr)
print()
print(np_arr.shape)

[[11 23 32]
 [12 34 56]
 [77 77 54]]

(3, 3)


In [None]:
np_arr = np.array([[11,23,32],[12,34,56],[77,77]])
print(np_arr)
print()
print(np_arr.shape)

NameError: ignored

In [None]:
np_arr = np.array([[11,23,32],[12,34,56],[77,77,None]])
print(np_arr)
print()
print(np_arr.shape)

[[11 23 32]
 [12 34 56]
 [77 77 None]]

(3, 3)


In addition to these, we can create arrays using a bunch of special functions provided by numpy.

In [None]:
np_zeros = np.zeros([2,4])
np_zeros

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
np_zeros1 = np.zeros((2,4))
np_zeros1

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
np.zeros_like(np_zeros)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
np_ones = np.ones([2,4])
np_ones

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [None]:
np_ones1 = np.ones((2,4))
np_ones1

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [None]:
np.ones_like(np_ones)

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

### Full Array

In [None]:
np.full((3,2),4)

array([[4, 4],
       [4, 4],
       [4, 4]])

In [None]:
np.full((5,4,1),4)

array([[[4],
        [4],
        [4],
        [4]],

       [[4],
        [4],
        [4],
        [4]],

       [[4],
        [4],
        [4],
        [4]],

       [[4],
        [4],
        [4],
        [4]],

       [[4],
        [4],
        [4],
        [4]]])

In [None]:
np.full((4,3,2,3),1)

array([[[[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]]],


       [[[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]]],


       [[[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]]],


       [[[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]],

        [[1, 1, 1],
         [1, 1, 1]]]])

In [None]:
np.full((4,3,2,3),1).shape

(4, 3, 2, 3)

# Basic Slicing and Indexing

In [None]:
arr = np.array([[10,20,30],[40,50,60],[18,18,18]])
print(arr)
print(arr.shape)
print(arr.dtype)
print(arr[0])
print(arr[0][0])
print(arr[0,0])
print(arr[-1])

[[10 20 30]
 [40 50 60]
 [18 18 18]]
(3, 3)
int64
[10 20 30]
10
10
[18 18 18]


In [None]:
np.arange(12)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [None]:
np.arange(12).reshape(3,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
np.arange(12).reshape(4,3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [None]:
np.arange(12).reshape(2,6)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [None]:
np.arange(12).reshape(12,1)

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11]])

In [None]:
np.arange(12).reshape(2,2,3) 
# 12 = 2*2*3

# This will generate 2 arrays, each of 2 rows and 3 cols

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

In [None]:
np.arange(12).reshape(2,3,2)

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]]])

In [None]:
np.arange(12).reshape(1,6,2)

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7],
        [ 8,  9],
        [10, 11]]])

In [None]:
np.arange(12).reshape(2,6,1)

array([[[ 0],
        [ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5]],

       [[ 6],
        [ 7],
        [ 8],
        [ 9],
        [10],
        [11]]])

In [None]:
from numpy import array
# define array
data = array([[100, 200, 300],[400, 500, 600],[700, 800, 900]]) # separate data
X, y = data[:, :-1], data[:, -1]
print("X=",X)
print("---------------")
print("y=",y)

X= [[100 200]
 [400 500]
 [700 800]]
---------------
y= [300 600 900]


# Advanced Indexing

This advanced indexing occurs when the reference object is also an array.
The simplest type of indexing is when we provide an array that’s equal in dimensions to
the array being accessed.

For example:

In [None]:
arr

array([[10, 20, 30],
       [40, 50, 60],
       [18, 18, 18]])

In [None]:
print("\n",arr)
print("\n",arr[[0,1,2],[1,0,0]])


 [[10 20 30]
 [40 50 60]
 [18 18 18]]

 [20 40 18]


# Boolean Indexing

This advanced indexing occurs when the reference object is an array of Boolean values.

This is used when we want to access data based on some conditions, in that case, Boolean
indexing can be used.

We will illustrate it with an example. Suppose in one array, we have the names of some
cities and in another array, we have some data related to those cities.

In [None]:
cities = np.array(["Delhi","Mumbai","Banglaore","Chennai","Bhopal"]) 
city_data = np.random.randn(5,3)
print("\ncity_data:\n",city_data)


city_data:
 [[ 0.13081785 -0.08874087 -1.10618288]
 [-1.27106392 -0.93733788 -0.19847776]
 [ 2.59383146 -0.70228583  0.31675341]
 [ 1.20837392 -0.88679141 -1.0752098 ]
 [ 0.18771092  0.30253931  0.54542954]]


In [None]:
city_data[cities =="Mumbai"]
# depending on which index is True, it prints that row
# In this case, mumbai is at index 2, therefore row no. 2 will be printed

array([[-1.27106392, -0.93733788, -0.19847776]])

In [None]:
print(cities =="Mumbai")

[False  True False False False]


In [None]:
# if the match does not happen , then all values are false.
city_data[cities =="Manhattan"]
# the o/p in such a case is indicating shape as(0 rows, 3 features)

array([], shape=(0, 3), dtype=float64)

In [None]:
city_data[city_data >0]

array([0.13081785, 2.59383146, 0.31675341, 1.20837392, 0.18771092,
       0.30253931, 0.54542954])

In [None]:
city_data[city_data >0] = 0 # all positive values would be made 0 city_data

# Operations on Arrays

Most of the operations on the numpy arrays is achieved by using Universal functions
(Ufuncs).

Numpy provides a rich set of functions that we can leverage for various
operations on arrays. We only cover some of those functions here.

Universal functions are functions that operate on arrays in an element by element
fashion. The implementation of Ufunc is vectorized, which means that the execution of
Ufuncs on arrays is quite fast. The Ufuncs implemented in the numpy package are
implemented in compiled C code for speed and efficiency.

In [None]:
arr = np.arange(15).reshape(3,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [None]:
# we are adding a constant to all elements of the array
arr + 5 # most Ufunc return a array
# the above concept is called BROADCAST 
# which converts single element '5' into an array. 
# Broadcasting happens on its own

array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [None]:
arr * 2

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [None]:
arr1 = np.arange(15).reshape(5,3) 
arr2 = np.arange(5).reshape(5,1) 
print("arr1\n",arr1) 
print("arr2\n",arr2)
print("arr2 + arr1 \n",arr2 + arr1 ) # arr2[i] + arr1's entire row

arr1
 [[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]
arr2
 [[0]
 [1]
 [2]
 [3]
 [4]]
arr2 + arr1 
 [[ 0  1  2]
 [ 4  5  6]
 [ 8  9 10]
 [12 13 14]
 [16 17 18]]


# Linear Algebra Using numpy

Linear algebra is an integral part of the domain of Machine Learning.
Most of the algorithms we will deal with can be concisely expressed using the operations
of linear algebra.

Numpy was initially built to provide the functions similar to MATLAB and hence linear
algebra functions on arrays were always an important part of it.
Here, we learn a bit about performing linear algebra on ndarrays using the
functions implemented in the numpy package.

One of the most widely used operations in linear algebra is the dot product. This can be
performed on two compatible ndarrays by using the dot function.

In [None]:
# #### Linear algebra using numpy
A = np.array([[11,22,33],[44,55,66],[77,88,99]]) 
B = np.array([[9,8,7],[6,5,4],[1,2,3]]) 
print("A=",A)
print("B=",B)
A.dot(B) # its simple array multiplication

A= [[11 22 33]
 [44 55 66]
 [77 88 99]]
B= [[9 8 7]
 [6 5 4]
 [1 2 3]]


array([[ 264,  264,  264],
       [ 792,  759,  726],
       [1320, 1254, 1188]])

In [None]:
# taking Transpose
A = np.arange(20).reshape(5,4) # 5 rows and 4 columns each 
print(A)
print("\n")
print(A.T) # after transpose , we get 4 rows and 4 columns each

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]]


[[ 0  4  8 12 16]
 [ 1  5  9 13 17]
 [ 2  6 10 14 18]
 [ 3  7 11 15 19]]


In [None]:
np.linalg.svd(A) # linalg is the Linear Algebra package

(array([[-0.06412452, -0.77193785,  0.62621458,  0.08036255, -0.03737868],
        [-0.22469718, -0.49951094, -0.64142758,  0.24351066,  0.47882483],
        [-0.38526983, -0.22708403, -0.34889885, -0.14689469, -0.81036507],
        [-0.54584249,  0.04534288,  0.11722212, -0.7581928 ,  0.33377035],
        [-0.70641515,  0.31776979,  0.24688973,  0.58121428,  0.03514856]]),
 array([4.96337102e+01, 2.54849186e+00, 1.61005564e-15, 1.30753441e-16]),
 array([[-0.43989658, -0.47870789, -0.5175192 , -0.5563305 ],
        [ 0.71168181,  0.26615551, -0.17937079, -0.62489709],
        [ 0.54758936, -0.7211163 , -0.20053549,  0.37406242],
        [ 0.01207859, -0.4242538 ,  0.81227183, -0.40009662]]))

# Why SVD is used?

Matrix decomposition, also known as matrix factorization, involves describing a given matrix using its constituent elements.

Perhaps the most known and widely used matrix decomposition method is the Singular-Value Decomposition, or SVD. All matrices have an SVD, which makes it more stable than other methods, such as the eigendecomposition. 

As such, it is often used in a wide array of applications including compressing, denoising, and data reduction.

https://machinelearningmastery.com/singular-value-decomposition-for-machine-learning/ 

(https://machinelearningmastery.com/singular-value-decomposition-for-machine-learning/)

In [None]:
a = np.array([[17,5,-23], [33,-53,2],[5,3,-7]])
b = np.array([16,-8,10])
x = np.linalg.solve(a, b)
x

array([6.21413721, 4.2016632 , 4.81081081])

In [None]:
# Finding the Inverse of a Matrix
# The NumPy library contains the ìnv function in the linalg module.
# let's find the inverse of a 2x2 matrix.
Y = np.array(([1,2], [3,4])) 
Z = np.linalg.inv(Y) 
print(Y)
print("\n")
print(Z)
# How to find the inverse or determinant of a matrix ?
# https://www.mathsisfun.com/algebra/matrix-inverse.html

[[1 2]
 [3 4]]


[[-2.   1. ]
 [ 1.5 -0.5]]


In [None]:
# Finding the Determinant of a Matrix
# The determinant of a matrix can be calculated using the det method
X = np.array(([1,2,3], [4,5,6], [7,8,9])) 
Z = np.linalg.det(X)
print(X)
print("\n")
print(Z)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


0.0


In [None]:
# Finding the Trace of a Matrix
# The trace of a matrix is the sum of all the elements in the diagonal
# of a matrix. The NumPy library contains trace function that can be
# used to find the trace of a matrix.
X = np.array(([1,2,3], [4,5,6], [7,8,9])) 
Z = np.trace(X)
print(X)
print("\n")
print(Z)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


15


# Working of NumPy’s broadcasting functionality

In [None]:
import numpy as np
x = np.array([10, 11, 12])
y = np.array([6,6,6]) 
x+y

array([16, 17, 18])

In [None]:
x + 5

array([15, 16, 17])

In [None]:
M = np.ones((3,3)) 
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [None]:
M+x

array([[11., 12., 13.],
       [11., 12., 13.],
       [11., 12., 13.]])

In [None]:
x = np.arange(3)
y = np.arange(3)[:, np.newaxis]
print(x)
print("\n")
print(y)

[0 1 2]


[[0]
 [1]
 [2]]


In [None]:
x + y

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

# Structured Data: NumPy’s Structured Arrays

In [None]:
name = ['Alice', 'Bob', 'Cathy', 'Douglas'] 
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]
# But this is a bit clumsy.
# There’s nothing here that tells us that the three arrays are related;
# it would be more natural if we could use a single structure to
# store all of this data.
# NumPy can handle this through structured arrays,
# which are arrays with compound data types.

In [None]:
# Use a compound data type for structured arrays
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'), 'formats':('U10', 'i4', 'f8')})
print(data.dtype)
# Please Note :  np.zeros(4 -> indicates the no. of rows , we want.

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


In [None]:
# Now that we’ve created an empty container array,
# we can fill the array with our lists of values:
data['name'] = name 
data['age'] = age 
data['weight'] = weight 
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Douglas', 19, 61.5)]


In [None]:
# The handy thing with structured arrays is that you can now refer
# to values either by index or by name:
# Get all names
data['name']

array(['Alice', 'Bob', 'Cathy', 'Douglas'], dtype='<U10')

In [None]:
# Get first row of data
data[0]

('Alice', 25, 55.)

In [None]:
# Get the name from the last row
data[-1]['name']

'Douglas'

In [None]:
# Using Boolean masking, this even allows you to do some more
# sophisticated operations, such as filtering on age:
# Get names where age is under 30
data[data['age'] < 30]['name']

array(['Alice', 'Douglas'], dtype='<U10')

# Size of objects in Memory

## Int, floats

In [None]:
import sys

In [None]:
# An integer in Python is > 24bytes
sys.getsizeof(1)

28

In [None]:
# An integer in Python is > 24bytes
sys.getsizeof(10)

28

In [None]:
# Longs are even larger
sys.getsizeof(10**100)

72

In [None]:
# Numpy size is much smaller
np.dtype(int).itemsize

8

In [None]:
# Numpy size is much smaller
np.dtype(np.int8).itemsize

1

In [None]:
np.dtype(float).itemsize

8

### Lists are even larger

In [None]:
# A one-element list
sys.getsizeof([1])

64

In [None]:
# An array of one element in numpy
np.array([1]).nbytes

8

### And performance is also important


In [None]:
l = list(range(100000))
l

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [None]:
import numpy as np
a = np.arange(100000)
a

array([    0,     1,     2, ..., 99997, 99998, 99999])

In [None]:
%time np.sum(a ** 2) # Numpy is much faster

CPU times: user 647 µs, sys: 438 µs, total: 1.08 ms
Wall time: 433 µs


333328333350000

In [None]:
%time sum([x ** 2 for x in l])

CPU times: user 43.5 ms, sys: 7.21 ms, total: 50.7 ms
Wall time: 26.5 ms


333328333350000

# Useful Numpy functions

### `random` 

In [None]:
np.random.random(size=2)
# Return random floats in the half-open interval [0.0, 1.0).
# Results are from the “continuous uniform” distribution over the stated interval.

array([0.66018062, 0.61165426])

In [None]:
np.random.normal(size=2)
# creates an array of specified shape and fills it with random values which 
# is actually a part of Normal(Gaussian)Distribution. 
# This is Distribution is also known as Bell Curve because of its characteristics shape.

# https://www.geeksforgeeks.org/rand-vs-normal-numpy-random-python/

array([-0.50271929, -0.89775109])

In [None]:
np.random.rand(2, 4)

array([[0.03095338, 0.94749641, 0.37644835, 0.53019146],
       [0.62435959, 0.41807392, 0.60394879, 0.62182976]])

### `linspace`

the `linspace()` function generates an array with evenly spaced values between specified start, end values, using a specified number of elements

In [None]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [None]:
np.linspace(0, 1, 20)
# 20 points equally spaced between 0 and 1

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [None]:
np.linspace(0, 1, 20, False)
# 20 points Unequally spaced between 0 and 1

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

---
### `identity` and `eye`

In [None]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [None]:
np.eye(3, 3)

# The main difference is that with eye the diagonal can may be offset, 
# whereas identity only fills the main diagonal

# https://stackoverflow.com/questions/28363447/what-are-the-advantages-of-using-numpy-identity-over-numpy-eye

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [None]:
np.eye(8, 4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
np.eye(8, 4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [None]:
np.eye(8, 4, k=-3)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [None]:
np.arange(3,19)

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18])

In [None]:
arr = np.arange(3,19).reshape(4,4)
print(arr)

[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]
 [15 16 17 18]]


In [None]:
# to flatten the numpy array
arr.flatten()

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18])

There are two ways to flatten a matrix depending on the data type. For Numpy arrays, we use np.array.flatten() command; for non-array matrices, we use matrix.ravel(). Please try it out.

The numpy module of Python provides a function called numpy.ravel, which is used to change a 2-dimensional array or a multi-dimensional array into a contiguous flattened array. The returned array has the same data type as the source array or input array. If the input array is a masked array, the returned array will also be a masked array.

In [None]:
arr

array([[ 3,  4,  5,  6],
       [ 7,  8,  9, 10],
       [11, 12, 13, 14],
       [15, 16, 17, 18]])

In [None]:
arr_ravel = np.ravel(arr)  
print(arr_ravel)

[ 3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]


In [None]:
 np.ravel(arr, order='C')

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18])

In [None]:
 np.ravel(arr, order='F')

array([ 3,  7, 11, 15,  4,  8, 12, 16,  5,  9, 13, 17,  6, 10, 14, 18])

##  Joining and Stacking

- Vertical stacking (row wise) using vstack()
- Horizontal stacking (column wise) using hstack()
- Depth wise stacking (along third axis) using dstack()
- concatenate() function creates a new array by appending arrays after each other, along a given axis
- append() function appends an element to an array and creates a new copy of the array

In [None]:
import numpy as np


array_1 = np.arange(10).reshape(2,5)
print(array_1)

print("-"*30)

array_2 = np.arange(13,23,1).reshape(2,5)
print(array_2)

[[0 1 2 3 4]
 [5 6 7 8 9]]
------------------------------
[[13 14 15 16 17]
 [18 19 20 21 22]]


In [None]:
array_vstack = np.vstack([array_1,array_2])
print(array_vstack)
print()
print(array_vstack.shape)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [13 14 15 16 17]
 [18 19 20 21 22]]

(4, 5)


In [None]:
array_hstack = np.hstack([array_1,array_2])
print(array_hstack)
print()
print(array_hstack.shape)

[[ 0  1  2  3  4 13 14 15 16 17]
 [ 5  6  7  8  9 18 19 20 21 22]]

(2, 10)


In [None]:
array_dstack = np.dstack([array_1,array_2])
print(array_dstack)
print(array_dstack.shape) # two sets of 5x2 matrices

[[[ 0 13]
  [ 1 14]
  [ 2 15]
  [ 3 16]
  [ 4 17]]

 [[ 5 18]
  [ 6 19]
  [ 7 20]
  [ 8 21]
  [ 9 22]]]
(2, 5, 2)


In [None]:
array_concatenate = np.concatenate([array_1,array_2])
print(array_concatenate)
print(array_concatenate.shape) # same as vstack

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [13 14 15 16 17]
 [18 19 20 21 22]]
(4, 5)


In [None]:
array_append = np.append(array_1,array_2)
print(array_append)
print(array_append.shape)

[ 0  1  2  3  4  5  6  7  8  9 13 14 15 16 17 18 19 20 21 22]
(20,)


In [None]:
array_append = np.append(array_1,array_2,axis =1)
print(array_append)
print(array_append.shape)

[[ 0  1  2  3  4 13 14 15 16 17]
 [ 5  6  7  8  9 18 19 20 21 22]]
(2, 10)


In [None]:
array_append = np.append(array_1,array_2,axis = 0)
print(array_append)
print(array_append.shape)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [13 14 15 16 17]
 [18 19 20 21 22]]
(4, 5)


## Updating using where(  )

- `where()` function is used to choose values from arrays depending on the value of a specific condition

In [None]:
print(np.where(array_1 > 15, array_1+10, array_1 -10))

[[-10  -9  -8  -7  -6]
 [ -5  -4  -3  -2  -1]]


In [None]:
new_array = np.where(array_1 > 15, array_1+10, array_1 -10)
print(new_array)

[[-10  -9  -8  -7  -6]
 [ -5  -4  -3  -2  -1]]


In [None]:
new_array = np.where(array_1 < 15, array_1+10, array_1 -10)
print(new_array)

[[10 11 12 13 14]
 [15 16 17 18 19]]


In [None]:
new_array = np.where(array_1 > 15, array_1/10, array_1 -10)
print(new_array)

[[-10.  -9.  -8.  -7.  -6.]
 [ -5.  -4.  -3.  -2.  -1.]]


In [None]:
new_array = np.where(array_1 < 15, array_1/10, array_1 *10)
print(new_array)

[[0.  0.1 0.2 0.3 0.4]
 [0.5 0.6 0.7 0.8 0.9]]


# Random module inside numpy

In [None]:
arr= np.random.rand(25)
print(arr)
print("\n",arr.shape)

[0.02648591 0.07949309 0.69346026 0.65426288 0.42580973 0.61428745
 0.51666877 0.39092652 0.25598775 0.58122075 0.40308928 0.15653504
 0.81536677 0.35595763 0.97069417 0.95319197 0.96378117 0.00484503
 0.92311215 0.97681063 0.11998293 0.75567107 0.84043794 0.03125893
 0.0121875 ]

 (25,)


In [None]:
np.random.rand(10)  #= > 0,1

array([0.77759462, 0.73145213, 0.35470258, 0.06077032, 0.29483799,
       0.65242996, 0.24804329, 0.56935328, 0.76467448, 0.94444376])

In [None]:
np.random.randn(10)  # => -3,3

array([ 0.13039   , -2.0450914 ,  0.32724074, -0.58530526,  1.22687616,
       -1.64717997, -0.69353576, -0.79043953,  0.68133706,  1.4045158 ])

In [None]:
np.random.randint(2,10, size =(5,5) ) # integers

array([[5, 9, 5, 5, 4],
       [9, 3, 3, 7, 7],
       [2, 3, 6, 9, 8],
       [8, 5, 9, 6, 7],
       [6, 8, 2, 9, 2]])

In [None]:
arr= np.random.rand(25).reshape(5,5)
arr

array([[0.17285423, 0.026028  , 0.33236767, 0.52846187, 0.76318166],
       [0.29671888, 0.39500774, 0.70588338, 0.67403993, 0.71168427],
       [0.55076588, 0.99500902, 0.93852246, 0.65078213, 0.41884038],
       [0.14255673, 0.70362318, 0.7397618 , 0.86336232, 0.99846089],
       [0.14001909, 0.68492999, 0.69747663, 0.1835889 , 0.59250386]])

In [None]:
arr= np.random.randn(25).reshape(5,5)
arr

array([[-0.29353554, -0.33185123,  1.15939286, -0.72286295,  0.02606406],
       [-0.418068  , -0.46822014,  0.63458738,  1.11988673,  0.39378286],
       [ 0.26221541,  0.40060773, -0.04273963,  0.21908899, -1.01542572],
       [ 1.163501  ,  2.22313029, -0.73393126, -0.81111318,  0.01522113],
       [-1.55681782,  0.7633855 ,  1.96945761,  0.36910355, -0.20109765]])

In [None]:
arr= np.random.randint(2,10, size =(5,5) )  # 25 random numbers between 2 and 10 in a 5x5 matrix
arr

array([[3, 5, 6, 7, 2],
       [5, 8, 9, 3, 5],
       [2, 4, 2, 5, 5],
       [2, 6, 5, 6, 9],
       [5, 4, 5, 5, 8]])

In [None]:
arr = np.random.randn(3,4)
arr

array([[-0.24501226,  1.47396547, -0.97447565, -0.14615755],
       [-0.02027488, -1.52124528,  1.80012132,  1.05572517],
       [-0.97916022,  0.06006309,  1.07949748,  0.51318245]])

# How to save numpy data from memory to flat file?

Numpy data can be stored into memory in two files format – .npy file or .text file. For storing data as .npy file, just use np.save(‘output_file_name’, numpy_object) and for storing data as .txt file numpy function np.savetxt(‘output_file_name’, numpy_object).

np.save(‘output_file_name’, numpy_object) saves numpy_object data as .npy file

np.savetxt(‘output_file_name’, numpy_object) saves numpy_object data as .txt file

In [None]:
a = np.array([1, 2, 3, 4, 5])

np.save('output_file_name', a)
# Saving a array data as .npy file


b = np.savetxt('output_file_name', a)
# Saving a array data as .txt file

# What is use of ndenumerate?

ndenumerate return the co-ordinates and corresponding values in the co-ordinates

In [None]:
A = np.array([[11, 22, 23], [33, 43, 55]])
for index, x in np.ndenumerate(A):
    print(index, x)

(0, 0) 11
(0, 1) 22
(0, 2) 23
(1, 0) 33
(1, 1) 43
(1, 2) 55


# How to sort a numpy array based on one or more columns?

In [None]:
arr = np.random.randint(1,7, size=[8, 4])
arr

array([[2, 2, 5, 6],
       [4, 2, 4, 2],
       [2, 4, 1, 2],
       [4, 6, 5, 3],
       [2, 1, 3, 1],
       [5, 5, 4, 4],
       [2, 6, 6, 5],
       [6, 5, 3, 1]])

In [None]:
# Sort each columns of arr
np.sort(arr, axis=0)

array([[2, 1, 1, 1],
       [2, 2, 3, 1],
       [2, 2, 3, 2],
       [2, 4, 4, 2],
       [4, 5, 4, 3],
       [4, 5, 5, 4],
       [5, 6, 5, 5],
       [6, 6, 6, 6]])

# How to sort a numpy array based on 1 column using argsort?

In [None]:
# Get the index positions that would sort the array
x = np.array([1, 10, 5, 2, 8, 9])
sort_index = np.argsort(x)
print(sort_index)

[0 3 2 4 5 1]


In [None]:
x[sort_index]

array([ 1,  2,  5,  8,  9, 10])

In [None]:
# Argsort the first column
sorted_index_1stcol = arr[:, 0].argsort()

# Sort 'arr' by first column without disturbing the integrity of rows
arr[sorted_index_1stcol]


array([[2, 2, 5, 6],
       [2, 4, 1, 2],
       [2, 1, 3, 1],
       [2, 6, 6, 5],
       [4, 2, 4, 2],
       [4, 6, 5, 3],
       [5, 5, 4, 4],
       [6, 5, 3, 1]])

In [None]:
# Descending sort
arr[sorted_index_1stcol[::-1]]

array([[6, 5, 3, 1],
       [5, 5, 4, 4],
       [4, 6, 5, 3],
       [4, 2, 4, 2],
       [2, 6, 6, 5],
       [2, 1, 3, 1],
       [2, 4, 1, 2],
       [2, 2, 5, 6]])

# How to sort a numpy array based on 2 or more columns?

1.   List item
2.   List item



You can do this using np.lexsort by passing a tuple of columns based on which the array should be sorted.

Just remember to place the column to be sorted first at the rightmost side inside the tuple.

In [None]:
# Sort by column 0, then by column 1
lexsorted_index = np.lexsort((arr[:, 1], arr[:, 0])) 
arr[lexsorted_index]

array([[2, 1, 3, 1],
       [2, 2, 5, 6],
       [2, 4, 1, 2],
       [2, 6, 6, 5],
       [4, 2, 4, 2],
       [4, 6, 5, 3],
       [5, 5, 4, 4],
       [6, 5, 3, 1]])

# Working with dates

Numpy implements dates through the np.datetime64 object which supports a precision till nanoseconds. You can create one using a standard YYYY-MM-DD formatted date strings.

In [1]:
# Create a datetime64 object
%%time
date64 = np.datetime64('2021-10-19 19:41:10')
date64

NameError: ignored

In [None]:
# Drop the time part from the datetime64 object
dt64 = np.datetime64(date64, 'D')
dt64

numpy.datetime64('2021-10-19')

By default, if you add a number increases the number of days. But if you need to increase any other time unit like months, hours, seconds etc, then the timedelta object is much convenient.

In [None]:
# Create the timedeltas (individual units of time)
tenminutes = np.timedelta64(10, 'm')  # 10 minutes
tenseconds = np.timedelta64(10, 's')  # 10 seconds
tennanoseconds = np.timedelta64(10, 'ns')  # 10 nanoseconds

print('Add 10 days: ', dt64 + 10)
print('Add 10 minutes: ', dt64 + tenminutes)
print('Add 10 seconds: ', dt64 + tenseconds)
print('Add 10 nanoseconds: ', dt64 + tennanoseconds)

Add 10 days:  2021-10-29
Add 10 minutes:  2021-10-19T00:10
Add 10 seconds:  2021-10-19T00:00:10
Add 10 nanoseconds:  2021-10-19T00:00:00.000000010


Let me convert the dt64 back to a string.



In [None]:
# Convert np.datetime64 back to a string
np.datetime_as_string(dt64)


'2021-10-19'

When working with dates, you would often need to filter out the business days from the data. You can know if a given date is a business day or not using the 

np.is_busday().

In [None]:
print('Date: ', dt64)
print("Is it a business day?: ", np.is_busday(dt64))  
print("Add 2 business days, rolling forward to nearest biz day: ", np.busday_offset(dt64, 2, roll='forward'))  
print("Add 2 business days, rolling backward to nearest biz day: ", np.busday_offset(dt64, 2, roll='backward'))

Date:  2021-10-19
Is it a business day?:  True
Add 2 business days, rolling forward to nearest biz day:  2021-10-21
Add 2 business days, rolling backward to nearest biz day:  2021-10-21


# How to create a sequence of dates?

It can simply be done using the np.arange itself.



In [None]:
# Create date sequence
dates = np.arange(np.datetime64('2018-02-01'), np.datetime64('2018-02-10'))
print(dates)

# Check if its a business day
np.is_busday(dates)


['2018-02-01' '2018-02-02' '2018-02-03' '2018-02-04' '2018-02-05'
 '2018-02-06' '2018-02-07' '2018-02-08' '2018-02-09']


array([ True,  True, False, False,  True,  True,  True,  True,  True])

# How to convert numpy.datetime64 to datetime.datetime object?

In [None]:
# Convert np.datetime64 to datetime.datetime
import datetime
dt = dt64.tolist()
dt

datetime.date(2021, 10, 19)

Once you convert it to a datetime.date object, you have a lot more facilities to extract the day of month, month of year etc.

In [None]:
print('Year: ', dt.year)  
print('Day of month: ', dt.day)
print('Month of year: ', dt.month)  
print('Day of Week: ', dt.weekday())  # Sunday

Year:  2021
Day of month:  19
Month of year:  10
Day of Week:  1


# End of the Notebook