In [1]:
from math import sqrt

In [2]:
def get_shape(a):
    return [len(a), len(a[0])]

a = [
    [1, 2],
    [4, 5],
    [7, 8]
]

get_shape(a)

[3, 2]

In [3]:
def distance_from_origin(a):
    return sqrt(sum([ia ** 2 for ia in a]))

a = [1, 2, 3]
distance_from_origin(a)

3.7416573867739413

In [4]:
def distance_between(a, b):
    return sqrt(sum([(ia - ib) ** 2 for ia, ib in zip(a, b)]))

a = [1, 2, 3]
b = [4, 5, 6]
distance_between(a, b)

5.196152422706632

In [5]:
def unit_vector(w):
    return [iw / distance_from_origin(w) for iw in w]

w = [1, 2, 3]
unit_vector(w)

[0.2672612419124244, 0.5345224838248488, 0.8017837257372732]

In [6]:
def Minkowski_distance(k, a, b):
    """
    Given two vectors, return the minkowski distance between
    two vectors.
    """
    return sum([abs(comp_a - comp_b) ** k for comp_a, comp_b in zip(comp_a, comp_b)]) ** (1 / k)

In [7]:
def hamming_distance(a, b):
    assert len(a) == len(b), "Dimensions do not match!"
    hd = 0
    for ia, ib in zip(a, b):
        if ia != ib:
            hd += 1
    return hd

In [8]:
def transpose(a):
    a_transpose = []
    for icol in range(len(a[0])):
        a_transpose.append([])
        for irow in range(len(a)):
            # irow variable and icol static for an iteration
            a_transpose[icol].append(a[irow][icol])
    return a_transpose
            
            
a = [
    [1, 2],
    [4, 5],
    [7, 8]
]
    
transpose(a)            

[[1, 4, 7], [2, 5, 8]]


<h3> Q: Given two matrices please print the product of those two matrices </h3>
<pre>

Ex 1: A   = [[1 3 4]
             [2 5 7]
             [5 9 6]]
      B   = [[1 0 0]
             [0 1 0]
             [0 0 1]]
      A*B = [[1 3 4]
             [2 5 7]
             [5 9 6]]

     
Ex 2: A   = [[1 2]
             [3 4]]
      B   = [[1 2 3 4 5]
             [5 6 7 8 9]]
      A*B = [[11 14 17 20 23]
             [23 30 37 44 51]]
             
Ex 3: A   = [[1 2]
             [3 4]]
      B   = [[1 4]
             [5 6]
             [7 8]
             [9 6]]
      A*B =Not possible
</pre>

In [9]:
def dot_product(a, b):
    """
    Performs dot product given two vectors
    """
    assert len(a) ==  len(b), "Dot product not possible"
    return sum([ia * ib for ia, ib in zip(a, b)])

def matmul(a, b):
    result = []
    # Iterate through the rows of a
    for irow, a_row in enumerate(a):
        # Dot products will be appended with corresponding rows
        result.append([])
        # Iterate through the columns first
        for jcol in range(len(b[0])):
            b_col = []
            # Secondly iterate through the rows
            # Will complete iterating row first and column second
            for jrow in range(len(b)):
                b_col.append(b[jrow][jcol])
            # Peforming dot product
            dot_ab = dot_product(a_row, b_col)
            # appending the dot product
            result[irow].append(dot_ab)
    return result

In [10]:
A   = [
        [1, 3, 4],
        [2, 5, 7],
        [5, 9, 6]
    ]

B   = [
        [1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]
    ]

matmul(A, B)

[[1, 3, 4], [2, 5, 7], [5, 9, 6]]

In [11]:
A   = [
    [1, 2],
    [3, 4]
]
B   = [
    [1, 2, 3, 4, 5],
    [5, 6, 7, 8, 9]
]

matmul(A, B)

[[11, 14, 17, 20, 23], [23, 30, 37, 44, 51]]

In [12]:
A   = [
    [1, 2],
    [3, 4]
]
B   = [
    [1, 4],
    [5, 6],
    [7, 8],
    [9, 6]
]

matmul(A, B)

AssertionError: Dot product not possible

<h3> Q: Proportional Sampling - Select a number randomly with probability proportional to its magnitude from the given array of n elements</h3>

Consider an experiment, selecting an element from the list A randomly with probability proportional to its magnitude.
assume we are doing the same experiment for 100 times with replacement, in each experiment you will print a number that is selected randomly from A.

<pre>
Ex 1: A = [0 5 27 6 13 28 100 45 10 79]
let f(x) denote the number of times x getting selected in 100 experiments.
f(100) > f(79) > f(45) > f(28) > f(27) > f(13) > f(10) > f(6) > f(5) > f(0)
</pre>

In [13]:
def get_prefix_sum(arr):
    prefix_sum = [arr[0]]
    for i in range(1, len(arr)):
        prefix_sum.append(prefix_sum[i - 1] + arr[i])
    return prefix_sum

In [14]:
from numpy.random import uniform

# Proportional sampling is used in Kmeans++ for initalizing the centroids
def proportional_sampling(rv, k):
    summation = sum(rv)
    normalized_rv = [irv / summation for irv in rv]
    cumul = get_prefix_sum(normalized_rv)
    r = uniform(0.0, 1.0, k)
    samples = []
    for ir in r:
        for index, icum in enumerate(cumul):
            if ir <= icum:
                samples.append(rv[index])
                break
    return samples