# Numerical Python

To install numpy, use the following command to first ceate an environment:

> mkdir numpy-tutorial
> cd numpy-tutorial
> python3 -m venv .numpy-tutorial-venv
> source .numpy-tutorial-venv/bin/activate
> pip install numpy matplotlib
>
Start jupyter lab with ```jupyter lab``` command


## Curving Test Grades

The scenario is this: You’re a teacher who has just graded your students on a recent test. Unfortunately, you may have made the test too challenging, and most of the students did worse than expected. To help everybody out, you’re going to curve everyone’s grades.
<hr>
It’ll be a relatively rudimentary curve, though. You’ll take whatever the average score is and declare that a C. Additionally, you’ll make sure that the curve doesn’t accidentally hurt your students’ grades or help so much that the student does better than 100%.

In [29]:
import numpy as np
CURVE_CENTER = 80
grades = np.array([72, 35, 64, 88, 51, 90, 74, 12])
def curve(grades):
    average = grades.mean()
    change = CURVE_CENTER - average
    new_grades = grades + change
    return np.clip(new_grades, grades, 100)

curve(grades)

array([ 91.25,  54.25,  83.25, 100.  ,  70.25, 100.  ,  93.25,  31.25])

array([ 91.25,  54.25,  83.25, 100.  ,  70.25, 100.  ,  93.25,  31.25])

*Vectorization* is the process of performing the same operation in the same way for each element in an array. This removes for loops from your code but achieves the same result.
*Broadcasting* is the process of extending two arrays of different shapes and figuring out how to perform a vectorized calculation between them. Remember, grades is an array of numbers of shape (8,) and change is a scalar, or single number, essentially with shape (1,). In this case, NumPy adds the scalar to each item in the array and returns a new array with the results.

In [30]:
temperatures = np.array([
    29.3, 42.1, 18.8, 16.1, 38.0, 12.5,
    12.6, 49.9, 38.6, 31.3, 9.2, 22.2
]).reshape(2, 2, 3)

temperatures.shape


(2, 2, 3)

(2, 2, 3)

In [31]:
temperatures

array([[[29.3, 42.1, 18.8],
        [16.1, 38. , 12.5]],

       [[12.6, 49.9, 38.6],
        [31.3,  9.2, 22.2]]])

array([[[29.3, 42.1, 18.8],
        [16.1, 38. , 12.5]],

       [[12.6, 49.9, 38.6],
        [31.3,  9.2, 22.2]]])

In [32]:
np.swapaxes(temperatures, 1, 2)

array([[[29.3, 16.1],
        [42.1, 38. ],
        [18.8, 12.5]],

       [[12.6, 31.3],
        [49.9,  9.2],
        [38.6, 22.2]]])

array([[[29.3, 16.1],
        [42.1, 38. ],
        [18.8, 12.5]],

       [[12.6, 31.3],
        [49.9,  9.2],
        [38.6, 22.2]]])

In [33]:
table = np.array([
    [5, 3, 7, 1],
    [2, 6, 7 ,9],
    [1, 1, 1, 1],
    [4, 3, 2, 0],
])

In [34]:
table.max()

9

9

In [35]:
table.max(axis=0)

array([5, 6, 7, 9])

array([5, 6, 7, 9])

In [36]:
table.max(axis=1)

array([7, 9, 1, 4])

array([7, 9, 1, 4])

In [37]:
A = np.arange(32).reshape(4, 1, 8)

In [38]:
A

array([[[ 0,  1,  2,  3,  4,  5,  6,  7]],

       [[ 8,  9, 10, 11, 12, 13, 14, 15]],

       [[16, 17, 18, 19, 20, 21, 22, 23]],

       [[24, 25, 26, 27, 28, 29, 30, 31]]])

array([[[ 0,  1,  2,  3,  4,  5,  6,  7]],

       [[ 8,  9, 10, 11, 12, 13, 14, 15]],

       [[16, 17, 18, 19, 20, 21, 22, 23]],

       [[24, 25, 26, 27, 28, 29, 30, 31]]])

In [39]:
B = np.arange(48).reshape(1, 6, 8)

In [40]:
B

array([[[ 0,  1,  2,  3,  4,  5,  6,  7],
        [ 8,  9, 10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 29, 30, 31],
        [32, 33, 34, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 45, 46, 47]]])

array([[[ 0,  1,  2,  3,  4,  5,  6,  7],
        [ 8,  9, 10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 29, 30, 31],
        [32, 33, 34, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 45, 46, 47]]])

In [41]:
A + B

array([[[ 0,  2,  4,  6,  8, 10, 12, 14],
        [ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54]],

       [[ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62]],

       [[16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70]],

       [[24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70],
        [64, 66, 68, 70, 72,

array([[[ 0,  2,  4,  6,  8, 10, 12, 14],
        [ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54]],

       [[ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62]],

       [[16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70]],

       [[24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70],
        [64, 66, 68, 70, 72,

## Data Science Operations: Filter, Order, Aggregate

In [42]:
square = np.array([
    [16, 3, 2, 13],
    [5, 10, 11, 8],
    [9, 6, 7, 12],
    [4, 15, 14, 1]
])

In [43]:
for i in range(4):
    assert square[:, i].sum() == 34
    assert square[i, :].sum() == 34

In [44]:
assert square[:2, :2].sum() == 34

In [45]:
assert square[2:, :2].sum() == 34

In [46]:
assert square[:2, 2:].sum() == 34

In [47]:
assert square[2:, 2:].sum() == 34

In [48]:
square

array([[16,  3,  2, 13],
       [ 5, 10, 11,  8],
       [ 9,  6,  7, 12],
       [ 4, 15, 14,  1]])

array([[16,  3,  2, 13],
       [ 5, 10, 11,  8],
       [ 9,  6,  7, 12],
       [ 4, 15, 14,  1]])

In [56]:
numbers = np.linspace(5, 50, 28, dtype=int).reshape(4, -1)

numbers

array([[ 5,  6,  8, 10, 11, 13, 15],
       [16, 18, 20, 21, 23, 25, 26],
       [28, 30, 31, 33, 35, 36, 38],
       [40, 41, 43, 45, 46, 48, 50]])

In [57]:
mask = numbers % 4 == 0

In [58]:
numbers


array([[ 5,  6,  8, 10, 11, 13, 15],
       [16, 18, 20, 21, 23, 25, 26],
       [28, 30, 31, 33, 35, 36, 38],
       [40, 41, 43, 45, 46, 48, 50]])

In [59]:
mask

array([[False, False,  True, False, False, False, False],
       [ True, False,  True, False, False, False, False],
       [ True, False, False, False, False,  True, False],
       [ True, False, False, False, False,  True, False]])

In [60]:
numbers[mask]

array([ 8, 16, 20, 28, 36, 40, 48])

In [61]:
by_four = numbers[numbers % 4 == 0]

In [62]:
by_four

array([ 8, 16, 20, 28, 36, 40, 48])

In [67]:
from numpy.random import default_rng

In [68]:
rng = default_rng()

In [73]:
values = rng.standard_normal(10000)

In [74]:
values[:5]

array([-1.68864861,  1.28433066, -0.93155593,  0.27654817,  0.67974464])

In [75]:
std = values.std()

In [76]:
std

0.9981499370102743

In [77]:
filtered = values[(values > -2 * std) & (values < 2 * std)]

In [78]:
filtered.size

9560

In [79]:
values.size

10000

In [80]:
filtered.size / values.size

0.956

In [81]:
a = np.array([
    [1, 2],
    [3, 4],
    [5, 6],
])

In [84]:
b = a.T

In [83]:
a.transpose()

array([[1, 3, 5],
       [2, 4, 6]])

In [85]:
b.transpose()

array([[1, 2],
       [3, 4],
       [5, 6]])

In [86]:
a

array([[1, 2],
       [3, 4],
       [5, 6]])

In [87]:
data = np.array([
    [7, 1, 4],
    [8, 6, 5],
    [1, 2, 3]
])

np.sort(data)

array([[1, 4, 7],
       [5, 6, 8],
       [1, 2, 3]])

In [88]:
np.sort(data, axis=None)

array([1, 1, 2, 3, 4, 5, 6, 7, 8])

In [89]:
np.sort(data, axis=0)

array([[1, 1, 3],
       [7, 2, 4],
       [8, 6, 5]])

In [90]:
a = np.array([
    [4, 8],
    [6, 1]
])

b = np.array([
    [3, 5],
    [7, 2],
])


In [91]:
np.hstack((a, b))

array([[4, 8, 3, 5],
       [6, 1, 7, 2]])

In [92]:
np.vstack((a, b))

array([[4, 8],
       [6, 1],
       [3, 5],
       [7, 2]])

In [97]:
np.concatenate((a, b))

array([[4, 8],
       [6, 1],
       [3, 5],
       [7, 2]])

In [98]:
np.concatenate((a, b), axis=None)

array([4, 8, 6, 1, 3, 5, 7, 2])

# Implementing a Maclaurin Series

The equation for the Maclauren series for e^x.
$$\sum_{n=0}^{\infty} \frac{x^n}{n!} = 1 + x + \frac{x^2}{2} + \frac{x^3}{6} \ldots$$
You add up terms starting at zero and going theoretically to infinity. Each nth term will be x raised to n and divided by n!, which is the notation for the factorial operation.

In [103]:
from math import e, factorial

import numpy as np

fac = np.vectorize(factorial)

def e_x(x, terms=10):
    """Approximates e^x using a given number of terms of
    the Maclaurin series
    """
    n = np.arange(terms)
    return np.sum((x ** n) / fac(n))

if __name__ == "__main__":
    print("Actual:", e ** 3)  # Using e from the standard library

    print("N (terms)\tMaclaurin\tError")

    for n in range(1, 14):
        maclaurin = e_x(3, terms=n)
        print(f"{n}\t\t{maclaurin:.03f}\t\t{e**3 - maclaurin:.03f}")

Actual: 20.085536923187664
N (terms)	Maclaurin	Error
1		1.000		19.086
2		4.000		16.086
3		8.500		11.586
4		13.000		7.086
5		16.375		3.711
6		18.400		1.686
7		19.412		0.673
8		19.846		0.239
9		20.009		0.076
10		20.063		0.022
11		20.080		0.006
12		20.084		0.001
13		20.085		0.000


In [104]:
a = np.array([1, 3, 5.5, 7.7, 9.2], dtype=np.single)

In [105]:
b = np.array([1, 3, 5.5, 7.7, 9.2], dtype=np.uint8)

In [106]:
a

array([1. , 3. , 5.5, 7.7, 9.2], dtype=float32)

In [107]:
b

array([1, 3, 5, 7, 9], dtype=uint8)

In [111]:
names = np.array(["bob", "amy", "han"], dtype=str)

In [112]:
names

array(['bob', 'amy', 'han'], dtype='<U3')

In [113]:
names.itemsize

12

In [114]:
names = np.array(["bob", "amy", "han"])

In [115]:
names

array(['bob', 'amy', 'han'], dtype='<U3')

In [116]:
more_names = np.array(["bobo", "jehosephat"])

In [117]:
np.concatenate((names, more_names))

array(['bob', 'amy', 'han', 'bobo', 'jehosephat'], dtype='<U10')

In [119]:
names[2] = "jasmina"

In [120]:
names

array(['bob', 'amy', 'jas'], dtype='<U3')

# Structured Arrays

In [123]:
data = np.array([
    ("joe", 32, 6),
    ("mary", 15, 20),
    ("felipe", 80, 100),
    ("beyonce", 38, 9001),
], dtype=[("name", str, 10), ("age", int), ("power", int)])

In [124]:
data[0]

('joe', 32, 6)

In [125]:
data["name"]

array(['joe', 'mary', 'felipe', 'beyonce'], dtype='<U10')

In [128]:
data[data["power"] > 70]["name"]

array(['felipe', 'beyonce'], dtype='<U10')

In [129]:
np.sort(data[data["age"] > 20], order="power")["name"]

array(['joe', 'felipe', 'beyonce'], dtype='<U10')

# Manipulating Images With Matplotlib

In [130]:
import matplotlib.image as mpimg

img = mpimg.imread("kitty.jpg")
print(type(img))
print(img.shape)

<class 'numpy.ndarray'>
(1299, 1920, 3)
