## numPy

In [106]:
import numpy as np
np.__version__

'1.23.5'

In [107]:
example = np.array([1,2,3,4,5,6,7,8,9,10])
example_2 = np.array([[1,2,3,4,5,6,7,8,9,10],[1,2,3,4,5,6,7,8,9,10]])

print("shape: ", example.shape)
print("data type: ", example.dtype)
print("number of dimensions: ",example.ndim)
print("size: ", example.size)
print("example 2: size: ", example_2.size)
print("item size: ", example.itemsize)
print("example 2: item size: ", example_2.itemsize)

shape:  (10,)
data type:  int64
number of dimensions:  1
size:  10
example 2: size:  20
item size:  8
example 2: item size:  8


In [108]:
print("original example: ", example)
example[2] = 5
print("modified example: ", example)

original example:  [ 1  2  3  4  5  6  7  8  9 10]
modified example:  [ 1  2  5  4  5  6  7  8  9 10]


In [109]:
# sum an np array
print(example.sum())
print(example_2.sum())

57
110


In [110]:
# matrix multiplication
example_result = example * 2
print(example_result)

[ 2  4 10  8 10 12 14 16 18 20]


In [111]:
example_list = [1, 2, 3, 4, 5]
print(2 * example_list) # prints the list two times
example_array = np.array([1, 2, 3, 4, 5])
print(2 * example_array) # multiplies each element by 2

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
[ 2  4  6  8 10]


In [112]:
# dot product
dot = 0
a = np.array([1,2])
b = np.array([3,4])

for i in range(len(a)):
  dot += a[i]*b[i]

print(dot)

11


In [162]:
# testing numpy speed
from timeit import default_timer as timer
a = np.random.randn(100000)
b = np.random.randn(100000)
A = list(a)
B = list(b)
T = 1000

def dot1():
    dot = 0
    for i in range(len(A)):
        dot += A[i]*B[i]
    return dot

def dot2():
    return np.dot(a,b)

start = timer()
for t in range(T):
    dot1()
end = timer()
t1 = end - start

start = timer()
for t in range(T):
    dot2()
end = timer()
t2 = end - start

print(f'time with lists: {t1} s')
print(f'time with numpy: {t2} s')
print(f'numpy is {t1/t2} times faster')

time with lists: 8.636911667000277 s
time with numpy: 0.04848462500012829 s
numpy is 178.13712423221637 times faster


In [163]:
# multidimensional arrays
a = np.array([[1,3], [4,6]])
print(a)
print(a.shape)

# accessing elements
print(a[0]) #row
print(a[0][0]) #element
print(a[0,0]) #element

# slicing
print(a[:,0]) #all rows in col 0
print(a[0,:]) # all cols in row 0

[[1 3]
 [4 6]]
(2, 2)
[1 3]
1
1
[1 4]
[1 3]


In [164]:
# Transpose
print(a.T)

[[1 4]
 [3 6]]


In [165]:
print(a)
b = np.array([[1,2], [4,5]])
c = a.dot(b)
d = a * b
print(d)

[[1 3]
 [4 6]]
[[ 1  6]
 [16 30]]


In [166]:
# determinant
c = np.linalg.det(a)
print(c)

# inverse
d = np.linalg.inv(a)
print(d)

# diagonal
c = np.diag(a)
print(c)

# diag on a vector
c = np.diag([1,3])
print(c)


-6.0
[[-1.          0.5       ]
 [ 0.66666667 -0.16666667]]
[1 6]
[[1 0]
 [0 3]]


In [167]:
# indexing, slicing and boolean indexing
a = np.array([[1,2,3,4], [5,6,7,8], [9, 10, 11, 12]])

# indexing
print(a[0,1])

# slicing
print(a[0,:])
print(a[:,0])
print(a[0:2, 0:2])

# indexing starting at the end
print(a[-1, -1])

# boolean indexing
print(a[a > 5])

bool_idx = a > 2
print(bool_idx)
print(a[bool_idx])
print(np.where(a >2, 1, -1))

# fancy indexing
print(a[[0, 1, 2], [0, 1, 0]])

a = np.array([1, 2, 3, 4, 5, 6, 7, 8])
even = np.argwhere(a % 2 == 0).flatten()
print(even)
a_even = a[even]
print(a_even)

2
[1 2 3 4]
[1 5 9]
[[1 2]
 [5 6]]
12
[ 6  7  8  9 10 11 12]
[[False False  True  True]
 [ True  True  True  True]
 [ True  True  True  True]]
[ 3  4  5  6  7  8  9 10 11 12]
[[-1 -1  1  1]
 [ 1  1  1  1]
 [ 1  1  1  1]]
[1 6 9]
[1 3 5 7]
[2 4 6 8]


In [168]:
# reshaping
a = np.arange(1,7)
print(a)

b = a.reshape((2,3))
print(b)

c = a.reshape((3,2))
print(c)

# reshaping using newaxis
print(a.shape)
d = a[np.newaxis, :]
print(d)
print(d.shape)

e = a[:, np.newaxis]
print(e)
print(e.shape)

[1 2 3 4 5 6]
[[1 2 3]
 [4 5 6]]
[[1 2]
 [3 4]
 [5 6]]
(6,)
[[1 2 3 4 5 6]]
(1, 6)
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
(6, 1)


In [169]:
# concatenation
a = np.array([[1,2],[5,6]])
b = np.array([[7,8]])
print(a)
print(b)

c = np.concatenate((a,b), axis=0)
print(c)

# add a new row
d = np.concatenate((a,b), axis=0)
print(d)

e = np.concatenate((a,b.T), axis=1)
print(e)

[[1 2]
 [5 6]]
[[7 8]]
[[1 2]
 [5 6]
 [7 8]]
[[1 2]
 [5 6]
 [7 8]]
[[1 2 7]
 [5 6 8]]


In [170]:
# hstack and vstack
a = np.array([1,2,3])
b = np.array([4,5,6])

c = np.hstack((a,b))
print(c)

a = np.array([[1],[2],[3]])
b = np.array([[4],[5],[6]])

c = np.vstack((a,b))
print(c)

[1 2 3 4 5 6]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]


In [171]:
# broadcasting
a = np.array([[1,2,3,4], [5,6,7,8]])
b = np.array([1,2,3,4])

c = a + b
print(c)

[[ 2  4  6  8]
 [ 6  8 10 12]]


In [172]:
# functions and axis
a  = np.array([[1,2,3],[4,5,6]])
print(a.sum()) # default = None => Overall sum
print(a.sum(axis=None)) # overall sum
print(a.sum(axis=0)) # sum of each row
print(a.sum(axis=1)) # sum of each column

print(a.mean(axis=0)) # mean of each row
print(a.mean(axis=1)) # mean of each column

# some other functions: std, var, min, max, ...

21
21
[5 7 9]
[ 6 15]
[2.5 3.5 4.5]
[2. 5.]


In [173]:
# datatypes
x = np.array([1,2])
print(x.dtype)

x = np.array([1.0,2.0])
print(x.dtype)

# force a datatype
x = np.array([1,2], dtype=np.int64) # 8 bytes
print(x.dtype)

x = np.array([1,2], dtype=np.int32) # 4 bytes
print(x.dtype)

int64
float64
int64
int32


In [174]:
# copying
a = np.array([1,2,3,4,5])
b = a
# this will modify a as well
b[0] = 100

print(a)
print(b)

b = a.copy()
b[0] = 1000

print(a)
print(b)

[100   2   3   4   5]
[100   2   3   4   5]
[100   2   3   4   5]
[1000    2    3    4    5]


In [175]:
# generating arrays

a = np.zeros((2,2)) # Create an array of all zeros
print(a) # Prints "[[ 0.  0.]

a = np.ones((1,2)) # Create an array of all ones
print(a)

a = np.full((2,2), 7) # Create a constant array
print(a) # Prints "[[ 7.  7.]

a = np.eye(2) # Create a 2x2 identity matrix
print(a) # Prints "[[ 1.  0.]

# 0...10
a = np.arange(10)
print(a)

# linspace
a = np.linspace(0, 10, 5) # Create an array of 6 values evenly spaced between 0 and 1
print(a)

# random numbers
a = np.random.random((2,2)) # Create an array filled with random values
print(a)

# "normal" distribution
a = np.random.randn(2,2) # Create an array filled with random values
print(a)

a = np.random.randn(1000)
# should be close to 1
print(a.mean(), a.var(), a.std())

# random ints
a = np.random.randint(0, 10, (2,2)) # Create an array filled with random values
print(a)

a = np.random.choice(7, size=10)
print(a)

a = np.random.choice([10, 22, 39, 46], size=8)
print(a)

[[0. 0.]
 [0. 0.]]
[[1. 1.]]
[[7 7]
 [7 7]]
[[1. 0.]
 [0. 1.]]
[0 1 2 3 4 5 6 7 8 9]
[ 0.   2.5  5.   7.5 10. ]
[[0.82663887 0.07219693]
 [0.01670803 0.74178286]]
[[-0.91793254  0.82063731]
 [ 0.14900966 -1.911465  ]]
-0.0035918310597463448 0.9693439477147077 0.9845526637588807
[[0 7]
 [6 3]]
[1 6 2 3 1 4 1 5 0 1]
[22 39 10 10 22 39 46 10]


In [176]:
# linear algebra
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
eiganvalues, eiganvectors = np.linalg.eig(a)
print(eiganvalues)
print(eiganvectors)

print(eiganvectors[:,0])

[ 1.61168440e+01 -1.11684397e+00 -8.58274334e-16]
[[-0.23197069 -0.78583024  0.40824829]
 [-0.52532209 -0.08675134 -0.81649658]
 [-0.8186735   0.61232756  0.40824829]]
[-0.23197069 -0.52532209 -0.8186735 ]


In [177]:
# solving linear systems
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
b = np.array([1,2,3])

x = np.linalg.inv(A).dot(b)
print(x)

# instead use:
x = np.linalg.solve(A, b)
print(x)

[ 4. -4.  0.]
[-0.23333333  0.46666667  0.1       ]


In [178]:
# loading data from csv files
data = np.loadtxt('data.csv', delimiter=',', dtype=np.float32)
print(data)
data = np.genfromtxt('data.csv', delimiter=',', dtype=np.float32)
print(data)

[[1. 2. 3. 4.]
 [5. 6. 7. 8.]]
[[1. 2. 3. 4.]
 [5. 6. 7. 8.]]
