# Spark ML - Data structures

## Prepare the Spark Session

In [None]:
# Import findspark
import findspark

# Configure the environment
findspark.init()

# Import the Spark components required for the session creation
from pyspark import SparkConf
from pyspark.sql import SparkSession

# Configure and create the session
conf = SparkConf()
conf = conf.setAppName('mds-session')
conf = conf.setMaster('local[*]')
spark = SparkSession.builder.config(conf = conf).getOrCreate()

## Import data structures

In [None]:
# Import the required elements from Spark ML
from pyspark.ml.linalg import Vectors, Matrices

## DenseVectors

In [None]:
# Create a dense vector
dense = Vectors.dense([1, 2, 3, 4, 5])
dense

In [None]:
# Check del tipo de dato
type(dense)

## SparseVectors

In [None]:
# Create a sparse vector
sparse = Vectors.sparse(10, (1, 3, 5), (9.0, 3.5, 1.3))
sparse

In [None]:
# Check del tipo de dato
type(sparse)

## Vector operations

In [None]:
# Dense to numpy
dense.toArray()

In [None]:
# Sparse to numpy
sparse.toArray()

In [None]:
# Dot product
vector1 = Vectors.dense([1, 2, 3, 4])
vector2 = Vectors.sparse(4, [1, 3], [5, 6])
vector1.dot(vector2)

In [None]:
# Distance calculation
vector1 = Vectors.dense([1, 2, 3, 4])
vector2 = Vectors.sparse(4, [1, 3], [5, 6])
vector1.squared_distance(vector2)

## DenseMatrices

In [None]:
# DenseMatrix creation
dense = Matrices.dense(2, 2, range(4))
dense

In [None]:
# Check del tipo
type(dense)

## SparseMatrices

In [None]:
# Creating a sparse matrix (CSC)
sparse = Matrices.sparse(3, 3, [0, 2, 3, 6], [0, 1, 1, 0, 1, 2], [1, 3, 4, 2, 5, 6])
sparse

In [None]:
# Check del tipo
type(sparse)

## Matrices operations

In [None]:
# Dense to numpy
dense.toArray()

In [None]:
# Sparse to numpy
sparse.toArray()

## Close the session

In [None]:
spark.stop()