## Numpy

In [1]:
'''
What is numpy?

NumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object, various derived objects (such as masked
arrays and matrices), and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier
transforms, basic linear algebra, basic statistical operations, random simulation and much more.

At the core of the NumPy package, is the ndarray object. This encapsulates n-dimensional arrays of homogeneous data types
'''

'\nWhat is numpy?\n\nNumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object, various derived objects (such as masked\narrays and matrices), and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier\ntransforms, basic linear algebra, basic statistical operations, random simulation and much more.\n\nAt the core of the NumPy package, is the ndarray object. This encapsulates n-dimensional arrays of homogeneous data types\n'

In [2]:
'''
Numpy Arrays Vs Python Sequences

1. NumPy arrays have a fixed size at creation, unlike Python lists (which can grow dynamically). Changing the size of an ndarray will create a new array and delete the original.

2. The elements in a NumPy array are all required to be of the same data type, and thus will be the same size in memory.

3. NumPy arrays facilitate advanced mathematical and other types of operations on large numbers of data. Typically, such operations are executed more efficiently and with less
code than is possible using Python's built-in sequences.

4. A growing plethora of scientific and mathematical Python-based packages are using NumPy arrays; though these typically support Python-sequence input, they convert such input
to NumPy arrays prior to processing, and they often output NumPy arrays.
'''

"\nNumpy Arrays Vs Python Sequences\n\n1. NumPy arrays have a fixed size at creation, unlike Python lists (which can grow dynamically). Changing the size of an ndarray will create a new array and delete the original.\n\n2. The elements in a NumPy array are all required to be of the same data type, and thus will be the same size in memory.\n\n3. NumPy arrays facilitate advanced mathematical and other types of operations on large numbers of data. Typically, such operations are executed more efficiently and with less\ncode than is possible using Python's built-in sequences.\n\n4. A growing plethora of scientific and mathematical Python-based packages are using NumPy arrays; though these typically support Python-sequence input, they convert such input\nto NumPy arrays prior to processing, and they often output NumPy arrays.\n"

### Creating a NumPy array

In [3]:
import numpy as np

a = np.array([1,2,3]) # This is vector
print(a)
print(type(a))

[1 2 3]
<class 'numpy.ndarray'>


In [4]:
b = np.array([[1,2,3],[4,5,6]]) # This is matrix
print(b)

[[1 2 3]
 [4 5 6]]


In [5]:
c = np.array([[[1,2],[3,4]],[[5,6],[7,8]]]) # This is tensor
print(c)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [6]:
# dtype - float
print(np.array([1,2,3],dtype=float))
print(np.array([1,2,3],dtype=bool))
print(np.array([1,2,3],dtype=complex))

[1. 2. 3.]
[ True  True  True]
[1.+0.j 2.+0.j 3.+0.j]


In [7]:
# np.arange - range of numbers - same as for i in range
print(np.arange(1,11))
print(np.arange(1,11,2))

[ 1  2  3  4  5  6  7  8  9 10]
[1 3 5 7 9]


In [8]:
# Reshape - to change the shape
print(np.arange(1,11).reshape(5,2))
print('----------------------------------')
print(np.arange(1,11).reshape(2,5))
print('----------------------------------')
print(np.arange(8).reshape(2,2,2))

[[ 1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 9 10]]
----------------------------------
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
----------------------------------
[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [9]:
# print(np.arange(1,11).reshape(3,3)) -> cannot reshape (product of arguments should be equal to size of array)
print(np.arange(1,13).reshape(3,4))
print('----------------------------------')
print(np.arange(1,13).reshape(4,3))
print('----------------------------------')
print(np.arange(1,13).reshape(2,6))
print('----------------------------------')
print(np.arange(1,13).reshape(6,2))

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
----------------------------------
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
----------------------------------
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]
----------------------------------
[[ 1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 9 10]
 [11 12]]


In [10]:
# np.ones  - all items are 1 
print(np.ones((3,4)))
print()

# np.zeros - all items are 0
print(np.zeros((3,4)))
print()

# np.random - random numbers
np.random.random((3,4)) # Two randoms - class name is random, method name is also random

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]



array([[0.23614842, 0.16723403, 0.85076739, 0.02664156],
       [0.26736531, 0.1164628 , 0.83238263, 0.03701973],
       [0.5183592 , 0.47629547, 0.78790621, 0.50954684]])

In [11]:
# np.linspace - linear Space - difference is equal
print(np.linspace(-10, 10, 10)) # default is float
print(np.linspace(-10, 10, 10, dtype=int))     # Lower range, upper range, no. of items, data type

[-10.          -7.77777778  -5.55555556  -3.33333333  -1.11111111
   1.11111111   3.33333333   5.55555556   7.77777778  10.        ]
[-10  -8  -6  -4  -2   1   3   5   7  10]


In [12]:
# np.identity - Identity matrix - diagonal is 1, rest are 0
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Array Attributes

In [13]:
a1 = np.arange(10,dtype=np.int32)               # Vector
a2 = np.arange(12,dtype=float).reshape(3,4)     # Matrix
a3 = np.arange(8).reshape(2,2,2)                # Tensor

a3

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [14]:
# ndim - to identify dimensions
print(a1.ndim, end=' ')
print(a2.ndim, end=' ')
print(a3.ndim)

# shape -Gives no. of items in each shape (this tell no. of rows and columns)
print(a1.shape, end=' ')    # (10,)
print(a2.shape, end=' ')    # (3, 4)
print(a3.shape)             # (2, 2, 2) -> two arrays of shape (2, 2)

# size - total number of items
print(a1.size, end=' ')
print(a2.size, end=' ')
print(a3.size)

# itemsize - gives size occupied by array in memory in bytes
print(a1.itemsize, end=' ')
print(a2.itemsize, end=' ')
print(a3.itemsize)

# dtype - gives data type
print(a1.dtype, end=' ')
print(a2.dtype, end=' ')
print(a3.dtype)

1 2 3
(10,) (3, 4) (2, 2, 2)
10 12 8
4 8 4
int32 float64 int32


### Changing Data Type

In [15]:
# astype - used to change data type (usually to reduce memory)
print(a3.dtype)
print(a3.astype(np.float64))

int32
[[[0. 1.]
  [2. 3.]]

 [[4. 5.]
  [6. 7.]]]


### Array Operations - 2 types (Scalar and Vector)

In [48]:
a1 = np.arange(12).reshape(3,4)
a2 = np.arange(12,24).reshape(3,4)

print(a1)
print(a2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [17]:
# scalar operations - only on single array

# arithmetic 
print(a1 ** 2)

# relational
print(a2 >15)

[[  0   1   4   9]
 [ 16  25  36  49]
 [ 64  81 100 121]]
[[False False False False]
 [ True  True  True  True]
 [ True  True  True  True]]


In [50]:
# vector operations
# arithmetic - used with both arrays
print(a1 + a2)  # can add because shape is same
print(a1 - a2)

[[12 14 16 18]
 [20 22 24 26]
 [28 30 32 34]]
[[-12 -12 -12 -12]
 [-12 -12 -12 -12]
 [-12 -12 -12 -12]]


### Array Functions

In [53]:
# a1 = np.random.random((3,3))
# a1 = np.round(a1*100)
a1 = np.arange(1, 101, 9).reshape(3, 4)
a1

array([[  1,  10,  19,  28],
       [ 37,  46,  55,  64],
       [ 73,  82,  91, 100]])

In [55]:
# max/min/sum/prod
# 0 -> col and 1 -> row

print(np.max(a1))
print(np.max(a1, axis=0)) # array [max of columns]
print(np.max(a1, axis=1)) # array [max of rows]

print(np.min(a1))
print(np.min(a1))

print(np.sum(a1))
print(np.prod(a1))
print(np.prod(a1, axis=0)) # product of column elements
print(np.prod(a1, axis=1)) # product of row elements

100
[ 73  82  91 100]
[ 28  64 100]
1
1
606
-2132615168
[  2701  37720  95095 179200]
[    5320  5991040 54472600]


array([ 28,  64, 100])

In [21]:
# mean/median/std/var
np.mean(a1)
np.median(a1)
np.std(a1)   # standard deviation
np.var(a1,axis=1) # variance

array([1013.55555556,  664.22222222,  122.        ])

In [22]:
# dot product - to use Dot Product - matrix should be (i, j) = (j, i) format
# eg. 3,4 = 4,3  (column of 1st matrix should match row of 2nd matrix)
a2 = np.arange(12).reshape(3,4)
a3 = np.arange(12,24).reshape(4,3)

np.dot(a2,a3)  # 3x3 matrix

array([[114, 120, 126],
       [378, 400, 422],
       [642, 680, 718]])

In [23]:
# log and exponents
print(np.log(a1))
print(np.exp(a1))

[[0.         4.33073334 2.99573227]
 [2.07944154 4.26267988 3.76120012]
 [3.4339872  1.38629436 2.94443898]]
[[2.71828183e+00 1.01480039e+33 4.85165195e+08]
 [2.98095799e+03 6.83767123e+30 4.72783947e+18]
 [2.90488497e+13 5.45981500e+01 1.78482301e+08]]


In [24]:
# round/floor/ceil

np.round(np.random.random((2,3))*100)
np.floor(np.random.random((2,3))*100)
np.ceil(np.random.random((2,3))*100)

array([[98., 94., 21.],
       [58., 67.,  7.]])

### Indexing and Slicing

In [25]:
a1 = np.arange(10)
a2 = np.arange(12).reshape(3,4)
a3 = np.arange(8).reshape(2,2,2)

a3

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [26]:
# Indexing - 1D
a1
print(a1[0])
print(a1[-1])
print(a1[-5])

0
9
5


In [27]:
# Indexing - 2D
a2
print(a2[0, 2])
print(a2[2, 3])
print(a2[1, 2])

2
11
6


In [28]:
# Indexing - 3D
print(a3)
print(a3[0, 0, 0])
print(a3[0, 0, 1])
print(a3[0, 1, 0])
print(a3[0, 1, 1])
print(a3[1, 0, 0])
print(a3[1, 0, 1])
print(a3[1, 1, 0])
print(a3[1, 1, 1])

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]
0
1
2
3
4
5
6
7


In [29]:
# Slicing - 1D
print(a1[2:9])
print(a1[2:9:2])

[2 3 4 5 6 7 8]
[2 4 6 8]


In [30]:
# Slicing - 2D
print(a2)
print()
print(a2[0,:]) # [0,:] => 0th row all elements
print(a2[1,:]) # [1,:] => 1st row all elements
print(a2[:,0]) # [:,0] => 0th column all elements
print(a2[:,1]) # [:,1] => 1st column all elements

print()
print(a2[1,::3])

print()
print(a2[0:2, 1:])

print()
print(a2[1:,1:3])
# print(a2[0:1])
# print(a2[0:2,1::2])
print()
print(a2[::2,::3])
print()
print(a2[::2,1::2])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[0 1 2 3]
[4 5 6 7]
[0 4 8]
[1 5 9]

[4 7]

[[1 2 3]
 [5 6 7]]

[[ 5  6]
 [ 9 10]]

[[ 0  3]
 [ 8 11]]

[[ 1  3]
 [ 9 11]]


In [31]:
a3 = np.arange(27).reshape(3,3,3) # This numpy array consists of three 2D arrays
a3

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [32]:
print(a3[1])

print('------------------------')
print(a3[::2])

print('------------------------')
print(a3[0, 1])
print(a3[1, :, 1])

print('------------------------')
print(a3[2, 1:, 1:])

print('------------------------')
print(a3[::2, 0, ::2])

[[ 9 10 11]
 [12 13 14]
 [15 16 17]]
------------------------
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]
------------------------
[3 4 5]
[10 13 16]
------------------------
[[22 23]
 [25 26]]
------------------------
[[ 0  2]
 [18 20]]


### Iterating

In [33]:
# a1
for i in a1:
    print(i)

0
1
2
3
4
5
6
7
8
9


In [34]:
for i in a2:
    print(i)

print()
for i in np.nditer(a2): # iterate through any dimensions arrays by making it 1D
    print(i)

[0 1 2 3]
[4 5 6 7]
[ 8  9 10 11]

0
1
2
3
4
5
6
7
8
9
10
11


In [35]:
for i in a3:
    print(i)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 9 10 11]
 [12 13 14]
 [15 16 17]]
[[18 19 20]
 [21 22 23]
 [24 25 26]]


### Reshaping

In [36]:
# reshape

In [37]:
# Transpose - interchange rows and columns
print(np.transpose(a2))
# or
print(a2.T)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [38]:
# ravel - change any dimension array to 1D array
a3.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26])

### Stacking - Joining arrays (horizontal or vertical)

In [57]:
a4 = np.arange(4).reshape(2,2)
a5 = np.arange(4,8).reshape(2,2)
print(a4)
print(a5)

[[0 1]
 [2 3]]
[[4 5]
 [6 7]]


In [58]:
# horizontal stacking               [2 x 2] [2 x 2] = [2 x 4]
np.hstack((a4,a5))

array([[0, 1, 4, 5],
       [2, 3, 6, 7]])

In [59]:
# Vertical stacking               [2 x 2] [2 x 2] = [4 x 2]
np.vstack((a4,a5))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

### Splitting - split joined arrays (horizontal or vertical)

In [42]:
# horizontal splitting
print(a4) # splitting on a4 array
np.hsplit(a4, 4)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


[array([[0],
        [4],
        [8]]),
 array([[1],
        [5],
        [9]]),
 array([[ 2],
        [ 6],
        [10]]),
 array([[ 3],
        [ 7],
        [11]])]

In [43]:
# vertical splitting
np.vsplit(a5,3)

[array([[12, 13, 14, 15]]),
 array([[16, 17, 18, 19]]),
 array([[20, 21, 22, 23]])]

In [44]:
print('d')

d
