# Data Science From Scratch  by Joel Grus

In [69]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from typing import List

### 4. Linear Algebra

#### Vectors

In [46]:
Vector = List[float]

height_age_weight = [70,    # in
                     170,   # lbs
                     40 ]   # years

grades = [95, 
          80,
          75,
          62 ]

In [47]:
zero = [0, 0 , 0]
v = [1, 2, 3]
w = [4, 5, 6]
u = [5, 7, 9]

In [48]:
def add(v: Vector, w: Vector) -> Vector:
    '''
    adds the corresponding elements
    '''
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i + w_i for v_i, w_i in zip(v, w)]

In [49]:
assert add(v, w) == u

In [50]:
def subtract(v: Vector, w: Vector) -> Vector:
    '''
    subtracts the corresponding elements
    '''
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i - w_i for v_i, w_i in zip(v, w)]

In [51]:
assert subtract(u, w) == v

In [43]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=[zero[0], v[0]], y=[zero[1], v[1]], z=[zero[2], v[2]],
                                   mode='lines+markers', line=dict(color='blue')))
fig.add_trace(go.Scatter3d(x=[v[0], v[0]+w[0]], y=[v[1], v[1]+w[1]], z=[v[2], v[2]+w[2]],
                                   mode='lines+markers', line=dict(color='green')))
fig.add_trace(go.Scatter3d(x=[zero[0], u[0]], y=[zero[1], u[1]], z=[zero[2], u[2]], mode='lines+markers', line=dict(color='red')))
fig.update_layout(width=400, height=400)

In [55]:
def vector_sum(vectors: List[Vector]) -> Vector:
    '''
    sums up all corresponding elements
    '''
    # assert vectors not empty
    assert vectors, 'no vectors provided!'

    # assert all vectors are same length
    num_elements = len(vectors[0])
    assert all(len(v) == num_elements for v in vectors), 'different sizes!'

    # the i-th element of the result is the sum of every vector[i]
    return [sum(vector[i] for vector in vectors) for i in range(num_elements)]

assert vector_sum([[1, 2], [3, 4], [5, 6], [7, 8]]) == [16, 20]

In [56]:
def scalar_multiply(c: float, v: Vector) -> Vector:
    '''
    multiplies every element with c
    '''
    return [c * v_i for v_i in v]

In [60]:
def vector_mean(vectors: List[Vector]) -> Vector:
    '''
    calculates the mean of the components
    '''
    n = len(vectors)

    return scalar_multiply(1/n, vector_sum(vectors))

In [62]:
assert vector_mean([[1, 2],[3, 4], [5, 6]]) == [3, 4]

In [66]:
def dot(v: Vector, w: Vector) -> float:
    '''
    calculates v_1 * w_1 ... v_n * w_n
    '''
    assert len(v) == len(w), 'vectors must be same length'
    
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

In [67]:
assert dot(v, w) == 32

In [68]:
def sum_of_squares(v: Vector) -> float:
    '''
    calculates v_1 * v_1 + ... + v_n * v_n
    '''
    return dot(v, v)

In [70]:
def magnitude(v: Vector) -> float:
    '''
    returns the magnitude/length/norm of a vector
    '''
    return math.sqrt(sum_of_squares(v))

We have all tools necessary to calculate the distance of two vectors now, of which the formula is:

$
\displaystyle \sqrt {(v_1 - v_2)^2 + ... + (v_n - w_n)^2}
$

### Matrices