## SCC.403 Data Mining Lab 1: Introduction to Linear Algebra in Python

### 1.1 Vector Operations

In [248]:
def sum_vectors(a, b):
    ''' Computes the element-wise sum of 2 vectors (as Python lists) '''
    return [ai + bi for ai, bi in zip(a, b)]

In [249]:
def mult_vector(v, lam):
    ''' Computes an element-wise multiplication of a scaler, lam, with a vector, v (as a Python list) '''
    return [vi * lam for vi in v]

In [250]:
def dot_product(a, b):
    ''' Computes the dot product of 2 vectors a and b (as Python lists) '''
    return sum(ai * bi for ai, bi in zip(a,b))

In [14]:
a = [5, 2, 10, 7]
b = [2, 3, 4, 9]

In [15]:
sum_vectors(a, b)

[7, 5, 14, 16]

In [16]:
mult_vector(a, 5)

[25, 10, 50, 35]

In [18]:
dot_product(a, b)

119

### 1.2 Matrix Operations

In [251]:
def mat_mult(a, b):
    ''' Performs matrix multiplication of 2 matrices a and b (using a Python list of lists) '''
    result = []
    for row_idx, row in enumerate(a):
        sub_result = []
        for i in range(len(a[0])):
            cell = 0
            for col_idx, col in enumerate(row):
                cell += col * b[col_idx][i]
            sub_result.append(cell)
        result.append(sub_result)
    return result

In [116]:
a = [[2, 1], [3, 4]]
b = [[4, 6], [5, 7]]

In [117]:
mat_mult(a, b)

[[13, 19], [32, 46]]

### 1.3 Transpose and Inverse

In [252]:
def transpose(a):
    ''' Computes the transpose of a vector/matrix a (as a Python list/list of lists) '''
    return [[row[i] for row in a] for i in range(len(a[0]))]

In [151]:
a = [
    [1, 4, 5],
    [6, 3, 9], 
    [4, 3, 1]
]

In [152]:
transpose(a)

[[1, 6, 4], [4, 3, 3], [5, 9, 1]]

In [254]:
def is_inverse(a, b):
    ''' Returns True if b is the inverse matrix of a (as Python lists), False otherwise '''
    mult_result = mat_mult(a, b)
    for row_idx, row in enumerate(mult_result):
        if row[row_idx] != 1 and sum(row) != 1:
            return False
    return True

In [255]:
a = [[2, 1], [1, 1]]
b = [[1, -1], [-1, 2]]

In [256]:
is_inverse(a, b)

True

### 1.4 Distance Metrics

In [257]:
def euclidean(a, b):
    ''' Computes the euclidean distance between 2 vectors a and b (as Python lists) '''
    return sum((ai - bi) ** 2 for ai, bi in zip(a, b)) ** 0.5

In [258]:
a = [1, 2, 3]
b = [2, 3, 4]

In [259]:
euclidean(a, b)

1.7320508075688772

In [260]:
def low_dist(a):
    ''' Returns the pair of list indices with the minimum euclidean distance for rows in matrix a (as a Python list of lists) '''
    num_rows = len(a)
    cur_min = float("inf")
    row_a_idx, row_b_idx = -1, -1
    
    for row_idx, row in enumerate(a):
        num_to_compute = num_rows - (row_idx+1)
        for next_row_idx in range(row_idx+1, num_rows):
            dist = euclidean(row, a[next_row_idx])
            if dist < cur_min:
                cur_min = dist
                row_a_idx, row_b_idx = row_idx, next_row_idx
                
    return row_a_idx, row_b_idx

In [241]:
a = [
    [4, 6, 10],
    [1, 2, 3],
    [4, 5, 1],
    [7, 7, 5],
    [1, 9, 10]
]

In [242]:
low_dist(a)

(0, 4)

In [261]:
def norm(a):
    ''' Computes the norm of vector a (as a Python list) '''
    return sum(ai ** 2 for ai in a) ** 0.5

In [223]:
def cos_similarity(a, b):
    ''' Computes the cosine similarity between 2 vectors a and b (as a)'''
    return dot_product(a, b) / (norm(a) * norm(b))

In [236]:
a = [1, 2, 3]
b = [2, 3, 4]

In [237]:
cos_similarity(a, b)

0.9925833339709302