<a href="https://colab.research.google.com/github/asantucci/Python-Workshop/blob/main/Numerical_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Numpy & Matplotlib

What data structures have we seen so far ?
lists, tuples, dictionnaries, ...
no real array!

Numpy fixes that. Arrays are data structures where all elements are the same, can be randomly accessed and modified very efficiently.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# A stock simulation

In [None]:
# Stock simulation
T = 100
stock = np.zeros(T)
mu = 1.2
std = 5.0

for t in range(1,T):
    stock[t] = stock[t-1] + np.random.normal(mu,std)

plt.plot(stock)
plt.xlabel('Day')
plt.ylabel('APPL ($)')
plt.show()

In [None]:
# Stock simulation

T = 100
n = 10
stock = np.zeros((n,T))

for t in range(1,T):
    mu  = 1.2 * np.exp(-t/10)
    std = 0.25 * (1 + np.sin(t))
    stock[:,t] = stock[:,t-1] + np.random.normal(mu,std,n)

plt.plot(stock.transpose())
plt.xlabel('Day')
plt.ylabel('APPL ($)')
plt.show()

# Numpy

1-d, 2-d and n-d arrays


In [None]:
a = np.array([1, 2, 3, 4])
b = np.array([ [1., 2., 3.], 
               [4., 5., 6.] ])
c = np.array([ [1], 
               [2], 
               [3] ])

print("a\n", a)
print("a.shape ", a.shape)

print("b\n", b)
print("b.shape ", b.shape)

print("c\n", c)
print("c.shape ", c.shape)

Indexing and slicing

In [None]:
A = np.array([[1, 2, 3], 
              [4, 5, 6], 
              [7, 8, 9], 
              [10, 11, 12]])
print("A\n", A)
print("A[1,2]\n", A[1,2])
print("A[1:,:2]\n", A[1:,:2])
print("A[:,1]\n", A[:,1])
print("A[0,:]\n", A[0,:])
print("A[::2,:]\n", A[::2,:])

In [None]:
# Quiz
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
print(A[0:2,1:3])

Reshaping

In [None]:
# Arrays are row major, and that is preserved
A = np.array([[1, 2, 3], [4, 5, 6]])
print("A\n", A)

# B = A.reshape((3, 2))
# print("B\n", B)

# Using '-1' means "whatever you need to make it work"
C = A.reshape((6, -1))
print("C\n", C)

More initialization

In [None]:
print(np.arange(10))
print(np.arange(2, 13, 2))
print(np.zeros((2, 3)))
print(np.ones((3, 2)))
print(np.random.uniform(0, 1, (3, 2)))
print(np.linspace(-1, 2, 5))

## Operations on arrays

Usual math operations

everything (`+, -, *, /, **, %`) is element-wise by default

In [None]:
# Create two 3x2 arrays
A = np.arange(6, dtype=float).reshape((3, 2))
B = np.arange(5, 11, dtype=int).reshape((3, 2))
print(A - B)
print(A * B) 
print(A // B)
print(A / B)

In [None]:
# Sizes have to match
A = np.arange(6).reshape((3, 2))
C = np.arange(4).reshape((2, 2))
print(A)
print(C)

In [None]:
# More advanced stuff

# Inner & Outer products of vectors (1d arrays)
a = np.arange(5)
b = np.arange(5, 10)
# Element-wise
print(a * b)
# Inner product
print(a.dot(b))
# Outer product
print(np.outer(a,b))

In [None]:
# We can also create a random array, then sort it...
a = np.array([np.random.randint(1, 10) for x in range(10)])
print(f"Before sorting:\n{a}")
print(f"After sorting in ascending order:\n{np.sort(a)}")

In [None]:
matrix = np.array([np.random.randint(1, 10) for x in range(9)]).reshape(3, 3)
print(f"Original matrix of random integers:\n{matrix}\n")
sorted_mat = np.sort(matrix, axis = 0)  # Note that axis 0 --> column-wise sort.
print(f"Matrix sorted along first axis:\n{sorted_mat}\n")

## Broadcasting

This is the way numpy operate on arrays of different shapes

In [None]:
A = np.arange(10).reshape(2, 5)
print(A.shape)
print(A)
print(A + b)

In [None]:
# With scalar: obvious
b = 2.0
print(A + b)

In [None]:
# With other arrays: loop backward from end and match/stretch dimensions
b = np.arange(5)
print(A + b)

In [None]:
# Sizes have to match
A = np.arange(6).reshape(2, 3)
print(A.shape)
b = np.arange(3)
print(b.shape)

print(A + b)

## Axis keyword

In [None]:
a = np.arange(10) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
print(a.sum())    # sum of all elements
print(a.mean())   # average
print(a.prod())   # product
print(a.std())    # standard deviation
print(a.var())    # variance
print(a.min())    # minimum
print(a.max())    # maximum
print(a.argmin()) # location of minimum
print(a.argmax()) # location of maximum

In [None]:
a = np.arange(6).reshape(2, 3)
print(a)

print(a.sum(axis=0))
print(a.mean(axis=0))
print(a.std(axis=1))

In [None]:
# Quiz
a = np.array([[1, 2],
              [3, 4]])
print(a.max(axis=0))

### Mini-Exercice: normalize data

In [None]:
X = np.random.normal(loc=2, scale=3, size=(1000, 10))
print(X.shape)

# Each row is an observation ; Each column in a feature.
# Normalize the data by removing from each column its mean
# and by dividing each columns by its standard deviation.

mean = X.mean(axis=0)
std  = X.std(axis=0)

Xnormalized = (X - mean) / std
print(Xnormalized.mean(axis=0))

In [None]:
#@title Solution
Y = (X - X.mean(axis=0)) / X.std(axis=0)

## Combining data via *stack and concatenate

In [None]:
np.vstack((range(10), range(10, 20)))

In [None]:
np.hstack((range(10), range(10, 20)))

# Equivalently, since we're dealing with a 1-D array.
np.concatenate((range(10), range(10, 20)))

## Axes manipulation.

### Squeeze out superfluous axes

In [None]:
# Suppose we have a superfluous axis, i.e. one with extent one.
a = np.array([[1, 2, 3, 4, 5]])
print(a.shape)  # <-- Realize that the first dimension doesn't really help us...
# I.e. if we tried to ever index into a, every element access would be of the form
# a[0][i] for some i.
a[0][1]

In [None]:
# Instead, why not simply "squeeze" out this superfluous array dimension?
simplified = a.squeeze()
simplified

In [None]:
# Now we can use fewer extract operator`[` in order to retrieve elements.
simplified[1]

## Functions newaxis, swapaxes, and transpose
There are a few other helper functions worth calling out.

In [None]:
simple_vec = np.arange(5)
# Sort of like the opposite of squeeze, here we *add* a new dimension into our vector...
# thereby making it have two dimensions (one which is clearly superfluous)
matrix = simple_vec[np.newaxis, :]
matrix

In [None]:
print(f"The simple vector has shape {simple_vec.shape} whereas the matrix is of shape {matrix.shape}")

In [None]:
# We can also swap axes...
swapped = matrix.swapaxes(0, 1)  # <-- Swap the first two (zero-indexed) axes...
print(swapped)
print(swapped.shape)

In [None]:
# We can also transpose...helpful in mathematical contexts especially.
print(swapped.transpose().shape)
print(swapped * swapped.transpose())

# Matplotlib

In [None]:
import matplotlib.pyplot as plt

Very Matlab-like plotting

In [None]:
x = np.linspace(-1, 10, 50)
y = np.sin(x)

plt.figure()
plt.plot(x, y, '*-b')
plt.scatter(x, y**2, c='red')  # Notice that here, keyword `c` used to denote a colour argument.
plt.xlabel("x")
plt.ylabel("y")
plt.title("sin(x) and sin(x**2)")
plt.show()

In [None]:
x = np.logspace(0., 3., 10) # 10^0 to 10^3
y = np.exp(x**0.2+10.0*np.tanh(0.5*np.log(x)))
plt.figure()
# Notice that we use a "diamond" marker, a dash-dot linetype, and slightly thicker linewidth.
plt.loglog(x, y, marker="d", linestyle='-.', linewidth = 2) # semilogx, semilogy also exist
plt.xlabel("Frequency")
plt.ylabel("Gain")
plt.title("A cool title")
plt.show()

In [None]:
# Keyworkd `alpha` controls opacity (or transparency)
rv1 = np.random.normal(loc = -1, size = 1000)
rv2 = np.random.normal(loc = +1, size = 1000)
plt.hist(rv1, bins = 30, color = "red", alpha = 0.5)
plt.hist(rv2, bins = 30, color = "blue", alpha = 0.5)
plt.show()

In [None]:
# It's sometimes nice to use sub-plots to separate things into multiple facets.
# Here, we ignore the first return argument, it's beyond the scope of this class. (Useful for further formatting...)
f, (ax1, ax2) = plt.subplots(nrows = 2, ncols = 1, sharex = True)
ax1.hist(rv1, bins = 30)
ax2.hist(rv2, bins = 30)
ax1.set_title("Sharing X-axis")

In [None]:
plt.figure()
for color in ['blue', 'orange', 'green']:
    n = 60
    x, y = np.random.rand(2, n)
    scale = 200.0 * np.random.rand(n)
    plt.scatter(x, y, c=color, s=scale, label=color,
                alpha=0.3, edgecolors='none')
plt.xlabel("x")
plt.ylabel("y")
plt.show()

Many other plotting libraries ! More advanced and interactive.
- Plotly
- Bokeh
- Seaborn