## Why is NumPy Faster? - Fixed Type
- Faster to read less bytes of memory
- No type checkin when iterating through objects

## Why is NumPy Faster? - Contiguous Memory
Benefits:
    - SIMD Vector Processing
    - Effective Cache Utilization

## How are Lists different from NumPy?
### Lists
    a = [1,3,5] \\
    b = [1,2,3] \\
    
    a*b = __ERROR__
    
### NumPy
    a = np.array([1,3,5])
    b = np.array([1,2,3])
    
    a*b = np.array([1,6,15])

## Applications of NumPy?
- Mathematics (MATLAB Replacement)
- Plotting (Matplotlib)
- Backend (Pandas, Connect 4, Digital Photography)
- Machine Learning

In [1]:
#https://share.cocalc.com/share/47dd0c101469bae8a3c5cc029f056d7ae4394fa5/support/2016-02-12-LIGO/GW150914_tutorial.ipynb?viewer=share
#https://www.ibm.com/support/knowledgecenter/SSHGWL_1.2.3/analyze-data/markd-jupyter.html

# Load NumPy

In [1]:
import numpy as np

# The Basics

In [3]:
a = np.array([1,2,3])
print(a)

[1 2 3]


In [4]:
b = np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


In [5]:
# Get Dimension
a.ndim

1

In [6]:
b.ndim

2

In [7]:
# Get Shape
a.shape

(3,)

In [8]:
b.shape # (row, column)

(2, 3)

In [9]:
# Get Type
a.dtype

dtype('int32')

In [10]:
a = np.array([1,2,3], dtype = 'int16')

In [11]:
a.dtype

dtype('int16')

In [12]:
# Get Size
a.itemsize # in bytes

2

In [13]:
a = np.array([1,2,3], dtype = 'int32')
a.itemsize

4

In [14]:
# Get Total Number of Elements
a.size

3

In [15]:
# Get Total Size
a.size * a.itemsize

12

In [16]:
# Get Total Size
a.nbytes

12

In [17]:
b.itemsize # b is float

8

# Accessing/Changing Specific Elements, Rows, Columns, etc

In [18]:
a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [19]:
a.shape

(2, 7)

In [20]:
# Get a Specific Element [r, c]
a[1, 5]

13

In [21]:
# Get a Specific Row
a[0, :]

array([1, 2, 3, 4, 5, 6, 7])

In [22]:
# Get a Specific Column
a[:, 2]

array([ 3, 10])

In [23]:
# [start_index:end_index:step_size]
a[0, 1:6:2] # end_index 6 means 7 not 6

array([2, 4, 6])

In [24]:
# Alternative
a[0, 1:-1:2] # end_index -1 means 7, so same as a[0, 1:6:2]

array([2, 4, 6])

In [25]:
a[1,5] = 20
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 20 14]]


In [26]:
a[:,2] = 5
print(a)

[[ 1  2  5  4  5  6  7]
 [ 8  9  5 11 12 20 14]]


In [27]:
a[:,2] = [1,2]
print(a)

[[ 1  2  1  4  5  6  7]
 [ 8  9  2 11 12 20 14]]


* 3-d example

In [28]:
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [29]:
# Get Specific Element (work outside in)
b[0,1,1] # 1st dimension, 2nd row, 2nd column

4

In [30]:
b[:,1,:] # All dimensions, 2nd row, All columns

array([[3, 4],
       [7, 8]])

In [31]:
# Replace
b[:,1,:] = [[9,9],[8,8]]
print(b)

[[[1 2]
  [9 9]]

 [[5 6]
  [8 8]]]


# Initializing Differnt Types of Arrays

In [32]:
# All 0s Matrix
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [33]:
np.zeros([2,3])

array([[0., 0., 0.],
       [0., 0., 0.]])

In [34]:
np.zeros([2,3,3])

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [35]:
np.zeros([2,3,3,2])

array([[[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]]])

In [36]:
# All 1s Matrix
np.ones([4,2,2])

array([[[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]]])

In [37]:
np.ones([4,2,2], dtype = 'int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]])

In [38]:
# Any Other Number
np.full((2,2), 99)

array([[99, 99],
       [99, 99]])

In [39]:
np.full((2,2), 99, dtype = 'float32')

array([[99., 99.],
       [99., 99.]], dtype=float32)

In [40]:
# Any Other Number (Full-like)
np.full_like(a, 4)

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [41]:
np.full(a.shape, 4)

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [42]:
# Random Decima Numbers
np.random.rand(4,2)

array([[0.13142974, 0.48440342],
       [0.41486693, 0.66285077],
       [0.83154695, 0.89222687],
       [0.16957989, 0.56354472]])

In [43]:
np.random.rand(4,3,2)

array([[[0.51140539, 0.549828  ],
        [0.19131146, 0.91233236],
        [0.23122965, 0.91541438]],

       [[0.24331002, 0.97806695],
        [0.97115648, 0.05840141],
        [0.89801224, 0.12293802]],

       [[0.01760966, 0.27840877],
        [0.44278151, 0.6017623 ],
        [0.50200322, 0.25182552]],

       [[0.39747222, 0.54310258],
        [0.16325436, 0.44638255],
        [0.46057184, 0.06798731]]])

In [44]:
np.random.random_sample(a.shape)

array([[0.29236796, 0.75498232, 0.37139982, 0.1727467 , 0.43934129,
        0.06571414, 0.21421101],
       [0.0505228 , 0.8397471 , 0.63289391, 0.26992882, 0.47253166,
        0.05764579, 0.7275429 ]])

In [45]:
# Random Integer Values
np.random.randint(7)

4

In [46]:
np.random.randint(7, size = (3,3))

array([[5, 6, 5],
       [2, 6, 0],
       [1, 5, 2]])

In [47]:
np.random.randint(-4, 7, size = (3,3)) # from -4 to 6 (7 exclusive)

array([[ 0,  2,  1],
       [ 1,  6, -3],
       [ 3,  4, -4]])

In [48]:
# The Identity Matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [49]:
# Repeat an Array
arr = np.array([1,2,3])
r1 = np.repeat(arr, 3)
print(r1)

[1 1 1 2 2 2 3 3 3]


In [50]:
arr = np.array([[1,2,3]])
r1 = np.repeat(arr, 3, axis = 0)
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


In [51]:
arr = np.array([[1,2,3]])
r1 = np.repeat(arr, 3, axis = 1)
print(r1)

[[1 1 1 2 2 2 3 3 3]]


In [52]:
output = np.ones([5,5])
print(output)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


In [53]:
z = np.zeros([3,3])
print(z)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [55]:
z[1,1] = 9
print(z)

[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]


In [56]:
output[1:4,1:4] = z
print(output)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


In [57]:
output[1:-1,1:-1] = z
print(output)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


##### Be careful when copying arrays!!!

In [58]:
a = np.array([1,2,3,])
a

array([1, 2, 3])

In [60]:
b = a # variable 'b' points to the same thing 'a' does
b

array([1, 2, 3])

In [61]:
b[0] = 100
print(b)
print(a)

[100   2   3]
[100   2   3]


In [63]:
a = np.array([1,2,3])
print(a)
b = a.copy()
print(b)

[1 2 3]
[1 2 3]


In [64]:
b[0] = 100
print(b)
print(a)

[100   2   3]
[1 2 3]


# Mathematics

In [75]:
a = np.array([1,2,3,4])
print(a)

[1 2 3 4]


In [72]:
a + 2

array([5, 6, 7, 8])

In [68]:
a - 2

array([-1,  0,  1,  2])

In [69]:
a * 2

array([2, 4, 6, 8])

In [70]:
a / 2

array([0.5, 1. , 1.5, 2. ])

In [71]:
a += 2

array([3, 4, 5, 6])

In [73]:
b = np.array([1,0,1,0])

In [76]:
a + b

array([2, 2, 4, 4])

In [77]:
a ** 2

array([ 1,  4,  9, 16], dtype=int32)

In [78]:
# Take the sine
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [79]:
# Take the cosine
np.cos(a)

array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

##### Linera Algebra

In [4]:
a = np.ones([2,3])
print(a)

b = 2*np.ones([3,2])
# b = np.full((3,2), 2)
print(b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2. 2.]
 [2. 2.]
 [2. 2.]]


In [5]:
np.matmul(a,b)

array([[6., 6.],
       [6., 6.]])

In [6]:
# Find the Determinant
c = np.identity(3)
np.linalg.det(c)

1.0

##### Statistics

In [9]:
stats = np.array([[1,2,3],[4,5,6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [10]:
np.min(stats)

1

In [11]:
np.max(stats)

6

In [12]:
np.min(stats, axis = 1) # each row

array([1, 4])

In [13]:
np.min(stats, axis = 0) # each column

array([1, 2, 3])

In [14]:
np.max(stats, axis = 1) # each row

array([3, 6])

In [16]:
np.max(stats, axis = 0) # each column

array([4, 5, 6])

In [17]:
np.sum(stats)

21

In [18]:
np.sum(stats, axis = 0) # each column

array([5, 7, 9])

# Reorganizing Arrays

In [20]:
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)
print(before.shape)

[[1 2 3 4]
 [5 6 7 8]]
(2, 4)


In [24]:
after = before.reshape(8,1)
print(after)

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]


In [23]:
after = before.reshape(4,2)
print(after)

[[1 2]
 [3 4]
 [5 6]
 [7 8]]


In [25]:
after = before.reshape(2,2,2)
print(after)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [28]:
# Vertically Stack
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [29]:
np.vstack([v1,v2,v2,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8]])

In [30]:
np.vstack([v1,v2,v1,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [34]:
# Horizontal Stack
h1 = np.ones([2,4])
h2 = np.zeros([2,2])

np.hstack([h1,h2])

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

# Miscellaneous

##### Load Data from File

In [3]:
filedata = np.genfromtxt('data.txt', delimiter = ',')
filedata

array([[  1.,  13.,  21.,  11., 196.,  75.,   4.,   3.,  34.,   6.,   7.,
          8.,   0.,   1.,   2.,   3.,   4.,   5.],
       [  3.,  42.,  12.,  33., 766.,  75.,   4.,  55.,   6.,   4.,   3.,
          4.,   5.,   6.,   7.,   0.,  11.,  12.],
       [  1.,  22.,  33.,  11., 999.,  11.,   2.,   1.,  78.,   0.,   1.,
          2.,   9.,   8.,   7.,   1.,  76.,  88.]])

In [4]:
filedata.astype('int32')

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]])

In [5]:
filedata

array([[  1.,  13.,  21.,  11., 196.,  75.,   4.,   3.,  34.,   6.,   7.,
          8.,   0.,   1.,   2.,   3.,   4.,   5.],
       [  3.,  42.,  12.,  33., 766.,  75.,   4.,  55.,   6.,   4.,   3.,
          4.,   5.,   6.,   7.,   0.,  11.,  12.],
       [  1.,  22.,  33.,  11., 999.,  11.,   2.,   1.,  78.,   0.,   1.,
          2.,   9.,   8.,   7.,   1.,  76.,  88.]])

In [7]:
filedata = filedata.astype('int32')
filedata

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]])

##### Boolean Masking and Advacned Indexing

In [9]:
filedata > 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [10]:
filedata >= 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [11]:
filedata[filedata > 50]

array([196,  75, 766,  75,  55, 999,  78,  76,  88])

In [15]:
# You can index with a list in NumPy
a = np.array([1,2,3,4,5,6,7,8,9])
a[[1,2,8]]

array([2, 3, 9])

In [16]:
np.any(filedata > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [17]:
np.all(filedata > 50, axis = 0)

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [18]:
np.all(filedata > 50, axis = 1)

array([False, False, False])

In [19]:
((filedata > 50) & (filedata < 100))

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [22]:
(~((filedata > 50) & (filedata < 100)))

array([[ True,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

In [25]:
A = np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20],[21,22,23,24,25]])
print(A)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]]


In [27]:
A[2:4,0:2]

array([[11, 12],
       [16, 17]])

In [29]:
A[[0,1,2,3],[1,2,3,4]]

array([ 2,  8, 14, 20])

In [36]:
A[[0,3,4],3:5] # A[[0,3,4],3:]

array([[ 4,  5],
       [19, 20],
       [24, 25]])