<a href="https://colab.research.google.com/github/ckraju/python-data-analytics-2e/blob/master/Chapter_3_The_Numpy_Library.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CHAPTER 3 - THE NUMPY LIBRARY

## The NumPy Installation

In [133]:
import numpy as np

## Ndarray: The Heart of the Library

In [134]:
a = np.array([1, 2, 3])
a

array([1, 2, 3])

In [135]:
type(a)

numpy.ndarray

In [136]:
a.dtype

dtype('int64')

In [137]:
a.ndim

1

In [138]:
a.size

3

In [139]:
a.shape

(3,)

In [140]:
b = np.array([[1.3, 2.4], [0.3, 4.1]])
b.dtype

dtype('float64')

In [141]:
b.ndim

2

In [142]:
b.size

4

In [143]:
b.shape

(2, 2)

In [144]:
b.itemsize

8

In [145]:
b.data

<memory at 0x7f5339dcf130>

### Create an Array

In [146]:
c = np.array([[1, 2, 3], [4, 5, 6]])
c

array([[1, 2, 3],
       [4, 5, 6]])

In [147]:
d = np.array(((1, 2, 3), (4, 5, 6)))
d

array([[1, 2, 3],
       [4, 5, 6]])

In [148]:
e = np.array([(1, 2, 3), [4, 5, 6], (7, 8, 9)])
e

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Types of data

In [149]:
g = np.array([['a', 'b'], ['c', 'd']])
g

array([['a', 'b'],
       ['c', 'd']], dtype='<U1')

In [150]:
g.dtype

dtype('<U1')

In [151]:
g.dtype.name

'str32'

### The dtype Option

In [152]:
f = np.array([[1, 2, 3], [4, 5, 6]], dtype=complex)
f

array([[1.+0.j, 2.+0.j, 3.+0.j],
       [4.+0.j, 5.+0.j, 6.+0.j]])

### Intrinsic Creation of an Array 

In [153]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [154]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [155]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [156]:
np.arange(4, 10)

array([4, 5, 6, 7, 8, 9])

In [157]:
np.arange(0, 12, 3)

array([0, 3, 6, 9])

In [158]:
np.arange(0, 6, 0.6)

array([0. , 0.6, 1.2, 1.8, 2.4, 3. , 3.6, 4.2, 4.8, 5.4])

In [159]:
np.arange(0, 12).reshape(3, 4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [160]:
np.linspace(0, 10, 5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [161]:
np.random.random(3)

array([0.46692898, 0.17836095, 0.16816008])

In [162]:
np.random.random((3, 3))

array([[0.02886323, 0.99971132, 0.33562449],
       [0.19887985, 0.21057916, 0.62227201],
       [0.65786498, 0.98772454, 0.82384602]])

## Basic Operations

### Arithmetic Operators

In [163]:
a = np.arange(4)
a

array([0, 1, 2, 3])

In [164]:
a + 4

array([4, 5, 6, 7])

In [165]:
a * 2

array([0, 2, 4, 6])

In [166]:
b = np.arange(4, 8)
b

array([4, 5, 6, 7])

In [167]:
a + b

array([ 4,  6,  8, 10])

In [168]:
a * b

array([ 0,  5, 12, 21])

In [169]:
a * np.sin(b)

array([-0.        , -0.95892427, -0.558831  ,  1.9709598 ])

In [170]:
a * np.sqrt(b)

array([0.        , 2.23606798, 4.89897949, 7.93725393])

In [171]:
A = np.arange(0, 9).reshape(3, 3)
A

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [172]:
B = np.ones((3, 3))
B

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [173]:
A * B

array([[0., 1., 2.],
       [3., 4., 5.],
       [6., 7., 8.]])

### The Matrix Product

In [174]:
np.dot(A, B)

array([[ 3.,  3.,  3.],
       [12., 12., 12.],
       [21., 21., 21.]])

In [175]:
A.dot(B)

array([[ 3.,  3.,  3.],
       [12., 12., 12.],
       [21., 21., 21.]])

In [176]:
np.dot(B, A)

array([[ 9., 12., 15.],
       [ 9., 12., 15.],
       [ 9., 12., 15.]])

### Increment and Decrement Operators

In [177]:
a = np.arange(4)
a

array([0, 1, 2, 3])

In [178]:
a += 1
a

array([1, 2, 3, 4])

In [179]:
a -= 1
a

array([0, 1, 2, 3])

In [180]:
a = np.arange(4)
a += 4
a

array([4, 5, 6, 7])

In [181]:
a *= 2
a

array([ 8, 10, 12, 14])

#### Universal Functions (ufunc)

In [182]:
a = np.arange(1, 5)
a

array([1, 2, 3, 4])

In [183]:
np.sqrt(a)

array([1.        , 1.41421356, 1.73205081, 2.        ])

In [184]:
np.log(a)

array([0.        , 0.69314718, 1.09861229, 1.38629436])

In [185]:
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

### Aggregate Functions

In [186]:
a = np.array([3.3, 4.5, 1.2, 5.7, 0.3])
a

array([3.3, 4.5, 1.2, 5.7, 0.3])

In [187]:
a.sum()

15.0

In [188]:
a.min()

0.3

In [189]:
a.max()

5.7

In [190]:
a.mean()

3.0

In [191]:
a.std()

2.0079840636817816

## Indexing, Slicing and Iterating

### Indexing

In [192]:
a = np.arange(10, 16)
a

array([10, 11, 12, 13, 14, 15])

In [193]:
a[4]

14

In [194]:
a[-1]

15

In [195]:
a[-6]

10

In [196]:
a[[1, 3, 4]]

array([11, 13, 14])

In [197]:
A = np.arange(10, 19).reshape((3, 3))
A

array([[10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [198]:
A[1, 2]

15

### Slicing

In [199]:
a = np.arange(10, 16)
a

array([10, 11, 12, 13, 14, 15])

In [200]:
a[1:5]

array([11, 12, 13, 14])

In [201]:
a[1:5:2]

array([11, 13])

In [202]:
a[::2]

array([10, 12, 14])

In [203]:
a[:5:2]

array([10, 12, 14])

In [204]:
a[:5:]

array([10, 11, 12, 13, 14])

In [205]:
A = np.arange(10, 19).reshape((3, 3))
A

array([[10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [206]:
A[0, :]

array([10, 11, 12])

In [207]:
A[:, 0]

array([10, 13, 16])

In [208]:
A[0:2, 0:2]

array([[10, 11],
       [13, 14]])

In [209]:
A[[0, 2], 0:2]

array([[10, 11],
       [16, 17]])

### Iterating an Array 

In [210]:
for i in a:
    print(i)

10
11
12
13
14
15


In [211]:
for row in A:
    print(row)

[10 11 12]
[13 14 15]
[16 17 18]


In [212]:
for item in A.flat:
    print(item)

10
11
12
13
14
15
16
17
18


In [213]:
np.apply_along_axis(np.mean, axis=0, arr=A)

array([13., 14., 15.])

In [214]:
np.apply_along_axis(np.mean, axis=1, arr=A)

array([11., 14., 17.])

In [215]:
def foo(x):
    return x/2

In [216]:
np.apply_along_axis(foo, axis=1, arr=A)

array([[5. , 5.5, 6. ],
       [6.5, 7. , 7.5],
       [8. , 8.5, 9. ]])

In [217]:
np.apply_along_axis(foo, axis=0, arr=A)

array([[5. , 5.5, 6. ],
       [6.5, 7. , 7.5],
       [8. , 8.5, 9. ]])

## Conditions and Boolean Arrays

In [218]:
A = np.random.random((4, 4))
A

array([[0.71990933, 0.01905934, 0.84186208, 0.4610536 ],
       [0.39688443, 0.99287346, 0.5874095 , 0.95151921],
       [0.98859114, 0.91274823, 0.3271869 , 0.49681533],
       [0.28628499, 0.90595515, 0.06631556, 0.12725549]])

In [219]:
A < 0.5

array([[False,  True, False,  True],
       [ True, False, False, False],
       [False, False,  True,  True],
       [ True, False,  True,  True]])

In [220]:
A[A < 0.5]

array([0.01905934, 0.4610536 , 0.39688443, 0.3271869 , 0.49681533,
       0.28628499, 0.06631556, 0.12725549])

## Shape Manipulation

In [221]:
a = np.random.random(12)
a

array([0.37331801, 0.89043226, 0.18142307, 0.02044956, 0.67661989,
       0.13458466, 0.67146327, 0.48495939, 0.50764277, 0.205061  ,
       0.71451715, 0.41160058])

In [222]:
A = a.reshape(3,4)
A

array([[0.37331801, 0.89043226, 0.18142307, 0.02044956],
       [0.67661989, 0.13458466, 0.67146327, 0.48495939],
       [0.50764277, 0.205061  , 0.71451715, 0.41160058]])

In [223]:
a.shape = (3, 4)
a

array([[0.37331801, 0.89043226, 0.18142307, 0.02044956],
       [0.67661989, 0.13458466, 0.67146327, 0.48495939],
       [0.50764277, 0.205061  , 0.71451715, 0.41160058]])

In [224]:
a = a.ravel()
a

array([0.37331801, 0.89043226, 0.18142307, 0.02044956, 0.67661989,
       0.13458466, 0.67146327, 0.48495939, 0.50764277, 0.205061  ,
       0.71451715, 0.41160058])

In [225]:
a.shape = (12)
a

array([0.37331801, 0.89043226, 0.18142307, 0.02044956, 0.67661989,
       0.13458466, 0.67146327, 0.48495939, 0.50764277, 0.205061  ,
       0.71451715, 0.41160058])

In [226]:
A.transpose()

array([[0.37331801, 0.67661989, 0.50764277],
       [0.89043226, 0.13458466, 0.205061  ],
       [0.18142307, 0.67146327, 0.71451715],
       [0.02044956, 0.48495939, 0.41160058]])

## Array Manipulation

### Joining Arrays

In [227]:
A = np.ones((3, 3))
B = np.zeros((3, 3))
np.vstack((A, B))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [228]:
np.hstack((A, B))

array([[1., 1., 1., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0.]])

In [229]:
a = np.array([0, 1, 2])
b = np.array([3, 4, 5])
c = np.array([6, 7, 8])
np.column_stack((a, b, c))

array([[0, 3, 6],
       [1, 4, 7],
       [2, 5, 8]])

In [230]:
np.row_stack((a, b, c))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

### Splitting Arrays

In [231]:
A = np.arange(16).reshape((4, 4))
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [232]:
[B, C] = np.hsplit(A, 2)
B

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])

In [233]:
C

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

In [234]:
[B, C] = np.vsplit(A, 2)
B

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [235]:
C

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [236]:
[A1, A2, A3] = np.split(A, [1,3], axis=1)
A1

array([[ 0],
       [ 4],
       [ 8],
       [12]])

In [237]:
A2

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10],
       [13, 14]])

In [238]:
A3

array([[ 3],
       [ 7],
       [11],
       [15]])

In [239]:
[A1, A2, A3] = np.split(A, [1, 3], axis=0)
A1

array([[0, 1, 2, 3]])

In [240]:
A2

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [241]:
A3

array([[12, 13, 14, 15]])

## General Concepts

### Copies or Views of Objects

In [242]:
a = np.array([1, 2, 3, 4])
b = a
b

array([1, 2, 3, 4])

In [243]:
a[2] = 0
b

array([1, 2, 0, 4])

In [244]:
c = a[0:2]
c

array([1, 2])

In [245]:
a[0] = 0
c

array([0, 2])

In [246]:
a = np.array([1, 2, 3, 4])
c = a.copy()
c

array([1, 2, 3, 4])

In [247]:
a[0] = 0
c

array([1, 2, 3, 4])

### Broadcasting

In [248]:
A = np.arange(16).reshape(4, 4)
b = np.arange(4)
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [249]:
b

array([0, 1, 2, 3])

In [250]:
A + b

array([[ 0,  2,  4,  6],
       [ 4,  6,  8, 10],
       [ 8, 10, 12, 14],
       [12, 14, 16, 18]])

In [251]:
m = np.arange(6).reshape(3, 1, 2)
n = np.arange(6).reshape(3, 2, 1)
m

array([[[0, 1]],

       [[2, 3]],

       [[4, 5]]])

In [252]:
n

array([[[0],
        [1]],

       [[2],
        [3]],

       [[4],
        [5]]])

In [253]:
m + n

array([[[ 0,  1],
        [ 1,  2]],

       [[ 4,  5],
        [ 5,  6]],

       [[ 8,  9],
        [ 9, 10]]])

In [254]:
structured = np.array([(1, 'First', 0.5, 1+2j),(2, 'Second', 1.3, 2-2j),
                      (3, 'Third', 0.8, 1+3j)],dtype=('i2, a6, f4, c8'))
structured

array([(1, b'First', 0.5, 1.+2.j), (2, b'Second', 1.3, 2.-2.j),
       (3, b'Third', 0.8, 1.+3.j)],
      dtype=[('f0', '<i2'), ('f1', 'S6'), ('f2', '<f4'), ('f3', '<c8')])

In [255]:
structured = np.array([(1, 'First', 0.5, 1+2j),(2, 'Second', 1.3, 2-2j),
                      (3, 'Third', 0.8, 1+3j)],dtype=('int16, a6, float32, complex64'))
structured

array([(1, b'First', 0.5, 1.+2.j), (2, b'Second', 1.3, 2.-2.j),
       (3, b'Third', 0.8, 1.+3.j)],
      dtype=[('f0', '<i2'), ('f1', 'S6'), ('f2', '<f4'), ('f3', '<c8')])

In [256]:
structured['f1']

array([b'First', b'Second', b'Third'], dtype='|S6')

In [257]:
structured = np.array([(1, 'First', 0.5, 1+2j),(2, 'Second', 1.3, 2-2j),(3, 'Third', 0.8, 1+3j)],
                      dtype=[('id', 'i2'),('position','a6'),('value','f4'),('complex','c8')])
structured

array([(1, b'First', 0.5, 1.+2.j), (2, b'Second', 1.3, 2.-2.j),
       (3, b'Third', 0.8, 1.+3.j)],
      dtype=[('id', '<i2'), ('position', 'S6'), ('value', '<f4'), ('complex', '<c8')])

In [258]:
structured.dtype.names = ('id','order','value','complex')

In [259]:
structured['order']

array([b'First', b'Second', b'Third'], dtype='|S6')

## Reading and Writing Array Data on Files

### Loading and Saving Data in Binary Files

In [260]:
data = np.random.random(12)
data = data.reshape(4,3)
data

array([[0.47793837, 0.38993589, 0.17821386],
       [0.08300138, 0.72422032, 0.82414741],
       [0.60601897, 0.92534428, 0.83047359],
       [0.52904808, 0.22046379, 0.16170264]])

In [261]:
np.save('saved_data', data)

In [262]:
loaded_data = np.load('saved_data.npy')
loaded_data

array([[0.47793837, 0.38993589, 0.17821386],
       [0.08300138, 0.72422032, 0.82414741],
       [0.60601897, 0.92534428, 0.83047359],
       [0.52904808, 0.22046379, 0.16170264]])

### Reading File with Tabular Data 

In [263]:
data = np.genfromtxt('ch3_data.csv', delimiter=',', names=True)
data

array([(1., 123., 1.4, 23.), (2., 110., 0.5, 18.), (3., 164., 2.1, 19.)],
      dtype=[('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value2_1', '<f8')])

In [264]:
data2 = np.genfromtxt('ch3_data2.csv', delimiter=',', names=True)
data2

array([(1., 123., 1.4, 23.), (2., 110., nan, 18.), (3.,  nan, 2.1, 19.)],
      dtype=[('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value2_1', '<f8')])

In [265]:
data2['id']

array([1., 2., 3.])

In [266]:
data2[0]

(1., 123., 1.4, 23.)