## Numpy Array vs Ordinary List

In [1]:
import numpy as np

### Square of Array Elements

In [2]:
ordinary_list = range(5000)
%timeit [i**2 for i in ordinary_list]

710 µs ± 5.66 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [3]:
numpy_array = np.arange(5000)
%timeit np.square(numpy_array)

1.48 µs ± 10.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


### Matrix Transpose

In [24]:
import random

def transpose(matrix):
    ncols = len(matrix[0])
    return [[row[i] for row in matrix] for i in range(ncols)]

a = [[1,2],
     [3,4]]
print(transpose(a))

gen_rnd_matrix = lambda n: [[random.randrange(0,100) for _ in range(n)] for _ in range(n)]

ordinary_matrix = gen_rnd_matrix(500)
%timeit transpose(ordinary_matrix)

[[1, 3], [2, 4]]
6.01 ms ± 50.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [25]:
numpy_matrix = np.array(ordinary_matrix)
%timeit numpy_matrix.T

96.6 ns ± 0.64 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


### Dot Product

In [26]:
dot = lambda u, v: sum(x*y for x,y in zip(u, v))
gen_rnd_array = lambda n: [random.randrange(0, 100) for _ in range(n)]

a, b = [1, 2], [3, 4]
print(dot(a, b))

u, v = gen_rnd_array(5000), gen_rnd_array(5000)

%timeit dot(u, v)

11
213 µs ± 1.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [14]:
a, b = np.array(u), np.array(v)
%timeit np.dot(a, b)

3.02 µs ± 27.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


### Matrix Multiplication

In [28]:
def matmul(a, b):
    bt = transpose(b)
    rv = []
    for i, row in enumerate(a):
        rv.append([])
        for col in bt:
            rv[-1].append(dot(row, col))
    return rv

a = [[1,2],
     [3,4]]
b = [[1,3],
     [2,4]]
matmul(a,b)

a, b = gen_rnd_matrix(500), gen_rnd_matrix(500)
%timeit matmul(a, b)

5.12 s ± 61.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
%timeit np.array(a) @ np.array(b)

97.6 ms ± 640 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Array Creation

In [53]:
def print_array_info(arr):
    print(arr)
    print(f'Data type: {arr.dtype}')
    print(f'Number of dimensions = {arr.ndim}')
    print(f'Shape of the array = {arr.shape}')
    print(f'Number of elements = {arr.size}')
    print(f'Length = {len(arr)}')

In [54]:
# 1D array
a = np.array([1, 2, 3, 4])
print_array_info(a)

[1 2 3 4]
Data type: int64
Number of dimensions = 1
Shape of the array = (4,)
Number of elements = 4
Length = 4


In [55]:
# 2D array
b = np.array([[0, 1, 2], [3, 4, 5]])
print_array_info(b)

[[0 1 2]
 [3 4 5]]
Data type: int64
Number of dimensions = 2
Shape of the array = (2, 3)
Number of elements = 6
Length = 2


In [56]:
# 3D array
c = np.array([[[1], [2]], [[3], [4]]])
print_array_info(c)

[[[1]
  [2]]

 [[3]
  [4]]]
Data type: int64
Number of dimensions = 3
Shape of the array = (2, 2, 1)
Number of elements = 4
Length = 2


In [57]:
a = np.arange(1, 9, 2) # start, end, step
a

array([1, 3, 5, 7])

In [58]:
b = np.linspace(0, 1, 6)  # start, end, number of points
b

array([0. , 0.2, 0.4, 0.6, 0.8, 1. ])

In [59]:
a = np.ones((3, 3))
print(a)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [60]:
b = np.zeros(5)
print(b)

[0. 0. 0. 0. 0.]


In [61]:
c = np.eye(3)
print(c)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [62]:
e = np.empty((2, 3))
print(e)

[[0.  0.2 0.4]
 [0.6 0.8 1. ]]


In [63]:
f = np.empty((3,4))
f.fill(3)
print(f)

[[3. 3. 3. 3.]
 [3. 3. 3. 3.]
 [3. 3. 3. 3.]]


In [64]:
a = np.array([1, 2, 3])
print(a)
print(a.dtype)
a[0] = 9.3
print(a)
print(a.dtype)

[1 2 3]
int64
[9 2 3]
int64


In [65]:
b = np.array([1., 2, 3])
b.dtype

dtype('float64')

In [66]:
# explicit writing dtype
c = np.array([1, 2, 3], dtype=np.float64)
print(c)
print(c.dtype)

[1. 2. 3.]
float64


In [67]:
d = np.array([True, False, False, True])
d.dtype

dtype('bool')

In [82]:
a = np.random.rand(3,2) * 3.4
b = np.random.rand(3,2) * 3.4

b

array([[2.71845658, 1.19699218],
       [3.0620345 , 2.35135654],
       [1.62058408, 2.97047883]])

## Indexing and Slicing

1) From the array created below extract:
   - the first row
   - the 2nd column
   - the first, the middle and the last column
   - a submatrix starting from the 3rd row and every even column
   - all even numbers
   - the 1st , the 2nd and the last element on the 3rd column

2) find the indices of all numbers devisible by 3

In [68]:
a = np.arange(25).reshape(5, 5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [69]:
a[0, :]

array([0, 1, 2, 3, 4])

In [70]:
a[:, 1]

array([ 1,  6, 11, 16, 21])

In [71]:
a[:, [0, int(a.shape[1]/2), -1]]

array([[ 0,  2,  4],
       [ 5,  7,  9],
       [10, 12, 14],
       [15, 17, 19],
       [20, 22, 24]])

In [72]:
a[2:, [0, 2, 4]]

array([[10, 12, 14],
       [15, 17, 19],
       [20, 22, 24]])

In [73]:
a[a % 2 == 0]  # with mask array

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24])

In [74]:
mask = np.array([1,1,0,0,1], dtype=bool)
# mask = np.array([True, True, False, False, True])
a[mask, 2]

array([ 2,  7, 22])

In [75]:
inds = np.where(a % 3 == 0)
print(inds[0])
print(inds[1])

[0 0 1 1 2 3 3 4 4]
[0 3 1 4 2 0 3 1 4]


## Broadcasting

How numpy treats arrays with different shapes during arithemtic operations?

Broadcasting rules:

In [76]:
a = np.random.randint(20, size=(10))
print(a)
b = np.random.randint(20, size=(10))
print(b)
a + b

[ 1  5 11 10  8  0 19 16  4 14]
[14  7 19 18  1 19  5  5 14 19]


array([15, 12, 30, 28,  9, 19, 24, 21, 18, 33])

In [83]:
a = np.random.randint(20, size=(10))
print(a)
b = np.random.randint(20, size=(7))
print(b)
a + b

[10 18 19  2  4 19 15  1  6 14]
[11  4  4  9  4 15 13]


ValueError: operands could not be broadcast together with shapes (10,) (7,) 

In [86]:
a = np.random.randint(20, size=(5,4))
a

array([[11,  2, 14, 19],
       [ 7, 17,  0,  7],
       [12,  4,  6,  5],
       [14, 15, 10, 18],
       [19,  0, 14, 15]])

In [87]:
b = np.random.randint(20, size=(5,1))
b

array([[ 5],
       [ 7],
       [ 1],
       [18],
       [ 5]])

In [88]:
a * b

array([[ 55,  10,  70,  95],
       [ 49, 119,   0,  49],
       [ 12,   4,   6,   5],
       [252, 270, 180, 324],
       [ 95,   0,  70,  75]])

## Exercise

The file 'lines.dat' contains a set of assgined lines with their qunatum numbers, frequencies and uncertanties.
The first 4 columns are the upper state quantum numbers, the next four columns are the lower state quantum numbers and the last 2 columns are the line frequency and uncertainty respectivly.

- from this file extract all frequencies and their uncertanties 
- find the weights of each line
- find all unique terms from the upper and lower term values
- find the maximum and minimum frequency value
- find...

In [91]:
lines = np.loadtxt('input.dat')
nq = 4
freqs = lines[:, 2*nq]
uncs = lines[:, 2*nq+1]
weights = 1./np.square(uncs)
terms = np.vstack((lines[:, :nq], lines[:, nq:2*nq]))
unique_terms = np.unique(terms, axis=0)

OSError: input.dat not found.