In [1]:
import pyspark
conf = pyspark.SparkConf().setAppName('appName').setMaster('local')
sc = pyspark.SparkContext(conf=conf)


In [None]:
sc.stop()

In [2]:
def load_vector(file: str):
    textFile = sc.textFile(file)
    def map_func(value):
        i, v = value.split()
        return int(i), float(v)
    return textFile.map(map_func)


def load_matrix(file: str):
    textFile = sc.textFile(file)
    def map_func(value):
        i, j, v = value.split()
        return int(i), int(j), float(v)
    return textFile.map(map_func)


V = load_vector("data/V.txt")
W = load_vector("data/W.txt")
M = load_matrix("data/M.txt")


In [3]:
(
    V.collect(),
    W.collect(),
    M.collect()
)

([(1, 1.5), (2, 5.0), (4, 1.3), (7, 3.0)],
 [(1, -1.5), (2, 2.0), (3, 2.3), (4, 2.0), (6, 2.5)],
 [(1, 1, 3.2),
  (1, 2, 2.4),
  (1, 3, 7.0),
  (1, 4, 2.0),
  (2, 2, 7.1),
  (2, 3, -1.0),
  (3, 3, 1.0)])

In [4]:
from operator import add
def map_func(value):
    _, v = value
    return v * v

normV = V.map(map_func).reduce(add) ** 0.5


In [5]:
normV


6.159545437773797

In [12]:
from operator import add
sumU = V.union(W).reduceByKey(add)

def filter_func(value):
    _, v = value
    return v != 0

sumU = sumU.filter(filter_func)

sumU.collect()

[(2, 7.0), (4, 3.3), (6, 2.5), (7, 3.0), (3, 2.3)]

In [9]:
from operator import add


def map_func(value):
    _, (vi, wi) = value
    return vi * wi

V.join(W).map(map_func).reduce(add)

10.35

In [13]:
from operator import add

def map_func(value):
    i, j, v = value
    return j, (i, v)


sumVM = V.join(M.map(map_func))

def map_func(value):
    _, (v, (i, m)) = value
    return i, (m * v)


sumVM = sumVM.map(map_func)

sumVM = sumVM.reduceByKey(add)


def filter_func(value):
    _, v = value
    return v != 0


sumVM = sumVM.filter(filter_func)


In [11]:
sumVM.collect()

[(2, 35.5), (1, 19.4)]

In [14]:
from operator import add


def filter_func(value):
    _, v = value
    return v != 0
    
def norm(V) -> float:
    def map_func(value):
        _, v = value
        return v * v
    return V.map(map_func).reduce(add) ** 0.5

def matrix_mult(M, V):
    def map_func(value):
        i, j, v = value
        return j, (i, v)
    sumVM = V.join(M.map(map_func))
    def map_func(value):
        _, (v, (i, m)) = value
        return i, (m * v)
    sumVM = sumVM.map(map_func)
    sumVM.collect()
    return sumVM.reduceByKey(add).filter(filter_func)

def dot_product(V, W):
    def map_func(value):
        _, (vi, wi) = value
        return vi * wi
    return V.join(W).map(map_func).reduce(add)



Vi = V

for i in range(10):
    normV = norm(Vi)
    def map_func(value):
        i, v = value
        return i, v / normV
    NVi = Vi.map(map_func)
    Vi = matrix_mult(M, NVi)
    l = dot_product(Vi, NVi)
    print("For i =", i,"  l =", l)
    


For i = 0   l = 5.4454401686874
For i = 1   l = 7.21308497442885
For i = 2   l = 7.15275911918617
For i = 3   l = 7.124109421391881
For i = 4   l = 7.110930432120519
For i = 5   l = 7.104939169125513
For i = 6   l = 7.102228675598136
For i = 7   l = 7.101004993592052
For i = 8   l = 7.10045306030242
For i = 9   l = 7.1002042175933955
