In [1]:
K = 5

In [2]:
import findspark
findspark.init()
import pyspark
sc = pyspark.SparkContext(appName='spectral')

In [3]:
spark = pyspark.sql.SparkSession(sc)

In [4]:
from pyspark.mllib.linalg import Vectors
from pyspark.mllib.linalg.distributed import IndexedRow, IndexedRowMatrix, MatrixEntry, CoordinateMatrix

In [5]:
from pyspark.ml.clustering import KMeans
from pyspark.ml.feature import VectorAssembler

In [6]:
# spark.conf.set('spark.sql.pivotMaxValues', 335000)

## Get adjacency matrix

In [7]:
txt = sc.textFile('./data/com-amazon.ungraph.txt')
txt.take(5)

['# Undirected graph: ../../data/output/amazon.ungraph.txt',
 '# Amazon',
 '# Nodes: 334863 Edges: 925872',
 '# FromNodeId\tToNodeId',
 '1\t88160']

In [8]:
txt = txt.sample(False, 0.001, 1)

In [8]:
txt = txt.zipWithIndex().filter(lambda x: int(x[1]) >= 4).map(lambda x: x[0].split('\t'))

In [9]:
txt.take(10)

[['1', '88160'],
 ['1', '118052'],
 ['1', '161555'],
 ['1', '244916'],
 ['1', '346495'],
 ['1', '444232'],
 ['1', '447165'],
 ['1', '500600'],
 ['2', '27133'],
 ['2', '62291']]

In [10]:
N = txt.flatMap(lambda x: [int(xx) for xx in x]).max()
N

548551

In [11]:
upper_entries = txt.map(lambda x: MatrixEntry(int(x[0])-1, int(x[1])-1, 1.0))
lower_entries = txt.map(lambda x: MatrixEntry(int(x[1])-1, int(x[0])-1, 1.0))
type(upper_entries) # rdd
type(lower_entries) # rdd

pyspark.rdd.PipelinedRDD

In [12]:
W = CoordinateMatrix(upper_entries.union(lower_entries), numCols=N, numRows=N)
print(W.numCols())
print(W.numRows())
print(type(W))

548551
548551
<class 'pyspark.mllib.linalg.distributed.CoordinateMatrix'>


In [151]:
# def toSparseRow(N):
#     return lambda val: Vectors.sparse(N, [(int(ii), 1) for ii in val])

# txt.flatMap(lambda x: [int(xx) for xx in x]).max()
# N = txt.flatMap(lambda x: [int(xx) for xx in x]).max()
# rows = txt.map(lambda x: tuple(x)).groupByKey().mapValues(toSparse(N))
# W = IndexedRowMatrix(rows)
# W.numCols()
# W.numRows()

## Graph Laplacian

In [13]:
degrees = upper_entries.map(lambda entry: (entry.i, entry.value)).reduceByKey(lambda a, b: a + b)
entries = degrees.map(lambda x: MatrixEntry(x[0], x[0], x[1]))
D = CoordinateMatrix(entries, numCols=N, numRows=N)

- Ordinay: $$L = D - W$$
- Norlaized: $$L = I - D^{-1}W$$
- Symmetric: $$L = I - D^{-1/2}WD^{-1/2}$$

**Calculating the Laplacian could be expensive.**

In [14]:
L = D.toBlockMatrix().subtract(W.toBlockMatrix()).toCoordinateMatrix()
type(L)

pyspark.mllib.linalg.distributed.CoordinateMatrix

## First k eigenvalues and eigen vectors

In [15]:
svd = L.toRowMatrix().computeSVD(k=K, computeU=False)

----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 56534)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socketserver.py", line 696, in __init__
    self.handle()
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/accumulators.py", line 268, in handle
    poll(accum_updates)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/accumulators.py",

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

--- Logging error ---
ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/pyt

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:56527)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-d737ca6fa3ab>", line 1, in <module>
    svd = L.toRowMatrix().computeSVD(k=K, computeU=False)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/linalg/distributed.py", line 344, in computeSVD
    "computeSVD", int(k), bool(computeU), float(rCond))
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 146, in call
    return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
  File "/usr/local/opt/apache-spark/libexec/python/pyspark/mllib/common.py", line 123, in callJavaFunc
    return _java2py(sc, func(*args))
  File "/usr/local/opt/apache-spark/libexec/python/lib/py4j-0.10.7-sr

Py4JNetworkError: An error occurred while trying to connect to the Java server (127.0.0.1:56527)

In [17]:
type(svd.s)
type(svd.V)

pyspark.mllib.linalg.DenseMatrix

In [17]:
## The PCA method is not that scalable and has a fixed limit of columns (65535)
V = L.computePrincipalComponents(k=K)

IllegalArgumentException: 'Argument with more than 65535 cols: 548551'

## K-means on rows of transformed data

In [23]:
V = svd.V.toArray()
type(V)

numpy.ndarray

In [24]:
VV = spark.createDataFrame(V.tolist())
type(VV)

pyspark.sql.dataframe.DataFrame

In [28]:
VV.schema.names

['_1', '_2', '_3', '_4', '_5', '_6', '_7', '_8', '_9', '_10']

In [29]:
kmeans = KMeans().setK(K).setSeed(1)
vecAssembler = VectorAssembler(inputCols=VV.schema.names, outputCol='features')
VV = vecAssembler.transform(VV)

In [31]:
model = kmeans.fit(VV.select('features'))
clusters = model.transform(VV)

In [33]:
clusters.select('prediction').show()

+----------+
|prediction|
+----------+
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
|         0|
+----------+
only showing top 20 rows



In [35]:
clusters.describe('prediction').show()

+-------+--------------------+
|summary|          prediction|
+-------+--------------------+
|  count|              548091|
|   mean|0.006900313998952729|
| stddev|  0.2116750644838857|
|    min|                   0|
|    max|                   9|
+-------+--------------------+



In [36]:
sc.stop()