In [1]:
# Vectors are points in some finite-dimensional space. 
# Although you might not think of your data as vectors, they are a good way to represent numeric data.

# The simplest from-scratch approach is to represent vectors as lists of numbers.
# A list of three numbers corresponds to a vector in three-dimensional space, and vice versa

height_weight_age = [70,170,40] #inches, pounds and years

grades = [95,80,75,62] #exams 

# Vectors add componentwise. This means that if two vectors v and w are the same length

def vector_add(v, w):
    """adds corresponding elements"""
    return [v_i + w_i
        for v_i, w_i in zip(v, w)]

In [2]:
def vector_subtract(v, w):
    """subtracts corresponding elements"""
    return [v_i - w_i
        for v_i, w_i in zip(v, w)]

In [3]:
# We’ll also sometimes want to componentwise sum a list of vectors. 
# That is, create a new vector whose first element is the sum of all the first elements, 
# whose second element is the sum of all the second elements, and so on. 
# The easiest way to do this is by adding one vector at a time:

def vector_sum(vectors):
    """sums all corresponding elements"""
    result = vectors[0]                         
    for vector in vectors[1:]:                  
        result = vector_add(result, vector)     
    return result

In [4]:
def vector_sum(vectors):
    return reduce(vector_add, vectors)

In [5]:
# We’ll also need to be able to multiply a vector by a scalar,
# which we do simply by multiplying each element of the vector by that number

def scalar_multiply(c, v):
    """c is a number, v is a vector"""
    return [c * v_i for v_i in v]

In [6]:
def vector_mean(vectors):
    """compute the vector whose ith element is the mean of the ith elements of the input vectors"""
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))

In [1]:
# “A less obvious tool is the dot product. 
# The dot product of two vectors is the sum of their componentwise products:”

def dot(v, w):
    """v_1 * w_1 + ... + v_n * w_n"""
    return sum(v_i * w_i
        for v_i, w_i in zip(v, w))

# The dot product measures how far the vector v extends in the w direction. 
# For example, if w = [1, 0] then dot(v, w) is just the first component of v 

In [2]:
# “Using this, it’s easy to compute a vector’s sum of squares:”

def sum_of_squares(v):
    """v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)

In [3]:
# Which we can use to compute its magnitude (or length):

import math

def magnitude(v):
    return math.sqrt(sum_of_squares(v))   # math.sqrt is square root function”

In [4]:
# We now have all the pieces we need to compute the distance between two vectors, defined as:

def squared_distance(v, w):
    """(v_1 - w_1) ** 2 + ... + (v_n - w_n) ** 2"""
    return sum_of_squares(vector_subtract(v, w))

def distance(v, w):
    return math.sqrt(squared_distance(v, w))

In [1]:
# A matrix is a two-dimensional collection of numbers. We will represent matrices as lists of lists, 
# with each inner list having the same size and representing a row of the matrix

A = [[1, 2, 3],  # A has 2 rows and 3 columns
     [4, 5, 6]]

B = [[1, 2],     # B has 3 rows and 2 columns
     [3, 4],
     [5, 6]]

In [2]:
# Given this list-of-lists representation, the matrix A has len(A) rows and len(A[0]) columns, 
# which we consider its shape

def shape(A):
    num_rows = len(A)
    num_cols = len(A[0]) if A else 0   # number of elements in first row
    return num_rows, num_cols

In [3]:
def get_row(A, i):
    return A[i]             # A[i] is already the ith row

def get_column(A, j):
    return [A_i[j]          # jth element of row A_i
            for A_i in A]   # for each row A_i”

In [4]:
# We’ll also want to be able to create a matrix given its shape and a function for generating its elements.
# We can do this using a nested list comprehension:

def make_matrix(num_rows, num_cols, entry_fn):
    """returns a num_rows x num_cols matrix
    whose (i,j)th entry is entry_fn(i, j)"""
    return [[entry_fn(i, j)             # given i, create a list
        for j in range(num_cols)]  #   [entry_fn(i, 0), ... ]
        for i in range(num_rows)]   # create one list for each i

In [5]:
def is_diagonal(i, j):
    """1's on the 'diagonal', 0's everywhere else"""
    return 1 if i == j else 0

identity_matrix = make_matrix(5, 5, is_diagonal)

# [[1, 0, 0, 0, 0],
#  [0, 1, 0, 0, 0],
#  [0, 0, 1, 0, 0],
#  [0, 0, 0, 1, 0],
#  [0, 0, 0, 0, 1]] 

In [6]:
# We can use a matrix to represent a data set consisting of multiple vectors, 
# simply by considering each vector as a row of the matrix

# Matrices can be used to represent binary relationships. In Chapter 1, we represented the edges of a network 
# as a collection of pairs (i, j). An alternative representation would be to create a matrix A 
# such that A[i][j] is 1 if nodes i and j are connected and 0 otherwise.

friendships = [[0, 1, 1, 0, 0, 0, 0, 0, 0, 0], # user 0
               [1, 0, 1, 1, 0, 0, 0, 0, 0, 0], # user 1
               [1, 1, 0, 1, 0, 0, 0, 0, 0, 0], # user 2
               [0, 1, 1, 0, 1, 0, 0, 0, 0, 0], # user 3
               [0, 0, 0, 1, 0, 1, 0, 0, 0, 0], # user 4
               [0, 0, 0, 0, 1, 0, 1, 1, 0, 0], # user 5
               [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 6
               [0, 0, 0, 0, 0, 1, 0, 0, 1, 0], # user 7
               [0, 0, 0, 0, 0, 0, 1, 1, 0, 1], # user 8
               [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]] # user 9

In [7]:
# With the matrix representation it is much quicker to check whether two nodes are connected 
# you just have to do a matrix lookup instead of (potentially) inspecting every edge

friendships[0][2] == 1   # True, 0 and 2 are friends
friendships[0][8] == 1   # False, 0 and 8 are not friends 

False

In [8]:
# Similarly, to find the connections a node has, you only need to inspect the column (or the row) 
# corresponding to that node:

friends_of_five = [i                                              # only need to look and one row
                    for i, is_friend in enumerate(friendships[5])  
                    if is_friend]                                 