In [None]:
!pip install pyspark
!pip install neo4j
!pip install pandas

import pandas as pd



In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Ejemplo de PySpark en Jupyter Notebook") \
    .getOrCreate()

# Obtener el SparkContext
sc = spark.sparkContext

# Parte 2
## Problemas a resolver

### Funciones para implementar PySpark

In [None]:
def hash(x, B):
    return x % B

In [None]:
def map_pdm(x, arista, y, B):

    x_hash = hash(x, B)
    y_hash = hash(y, B)

    l = []
    for i in range(B):
        l.append(((x_hash, y_hash, i), (x, arista, y)))
        l.append(((i, x_hash, y_hash), (x, arista, y)))
        l.append(((y_hash, i, x_hash), (x, arista, y)))

    return set(l)

In [None]:
def triangulo(nodos):

  triangulos_detectados = []
  for i in range(len(nodos)):
    nodo_actual = nodos[i]

    for j in range(i + 1, len(nodos)):

      nodo_sig = nodos[j]

      for k in range(j + 1, len(nodos)):
        nodo_sub_sig = nodos[k]

        if nodo_actual[2] == nodo_sig[0] and nodo_sig[2] == nodo_sub_sig[0] and nodo_actual[0] == nodo_sub_sig[2]:
          triangulos_detectados.append((nodo_actual[0], nodo_sig[0], nodo_sub_sig[0]))

        elif nodo_actual[2] == nodo_sub_sig[0] and nodo_sig[0] == nodo_sub_sig[2] and nodo_actual[0] == nodo_sig[2]:
          triangulos_detectados.append((nodo_actual[0], nodo_sig[0], nodo_sub_sig[0]))

  return triangulos_detectados

*Conección a Neo4j*

In [None]:
from neo4j import GraphDatabase

URI = "neo4j+s://06ae1fa1.databases.neo4j.io"
AUTH = ("neo4j","QjwVk3kN-OI5bTt-fg6LZy-F4LMHCpL9HFxuvSuq-OE")

driver = GraphDatabase.driver(URI, auth=AUTH)
with driver.session() as session:
    try:
        session.run("RETURN 1")
        print("Connection to Neo4j established successfully!")
    except Exception as e:
        print(f"Failed to connect to Neo4j: {e}")

Connection to Neo4j established successfully!


### Cargar el grafo




In [None]:
def get_data_from_neo4j():
    with driver.session() as session:
        result = session.run("""
        MATCH (n1:Node)-[r:RELATED]->(n2:Node)
        RETURN n1.id AS id_form, n2.id AS id_to, r.weight AS weight
        """)
        data = [record.data() for record in result]
        tuples = [(d['id_form'], d['weight'], d['id_to']) for d in data]
    return tuples

# Obtiene los datos de Neo4j
neo4j_data = get_data_from_neo4j()

In [None]:
rdd = sc.parallelize(neo4j_data)

In [None]:
rdd.take(5)

[(1, 1, 2), (1, 7, 3), (4, 3, 5), (6, 5, 7), (6, 14137, 8)]

Función para la busqueda de triángulos

In [None]:
B = 100

In [None]:
def buscar_triangulos(rdd, b):
    rdd_neo4j_map = rdd.flatMap(lambda dato: map_pdm(*dato, b))
    reduce_neo4j = rdd_neo4j_map.groupByKey().mapValues(list)
    trangulos = reduce_neo4j.map(lambda nodos: (nodos[0], triangulo(nodos[1])))

    dicc = {}
    for i in trangulos.collect():
        dicc[i[0]] = i[1]

    return dicc


In [None]:
test = sc.parallelize(neo4j_data)

buscar_triangulos(test, B)

{(89, 1, 3): [],
 (6, 8, 23): [],
 (14, 80, 13): [],
 (20, 22, 79): [],
 (29, 5, 67): [],
 (30, 31, 66): [],
 (47, 30, 34): [(147, 1034, 730)],
 (90, 44, 45): [],
 (58, 97, 44): [],
 (62, 79, 44): [(44, 62, 79)],
 (72, 75, 44): [],
 (44, 75, 94): [],
 (76, 51, 44): [],
 (44, 78, 21): [],
 (44, 79, 24): [],
 (76, 44, 85): [(44, 85, 376)],
 (92, 27, 44): [],
 (44, 94, 61): [],
 (76, 44, 95): [(44, 95, 376)],
 (44, 5, 4): [],
 (44, 5, 14): [(44, 314, 105)],
 (7, 8, 10): [(407, 408, 410)],
 (67, 7, 11): [],
 (26, 24, 29): [],
 (24, 49, 98): [],
 (24, 49, 88): [(124, 49, 88), (124, 49, 188)],
 (32, 24, 31): [(124, 131, 132)],
 (52, 24, 47): [(124, 147, 152)],
 (24, 75, 10): [],
 (24, 97, 58): [(124, 158, 197)],
 (85, 24, 4): [(124, 185, 204)],
 (15, 69, 17): [],
 (23, 72, 16): [],
 (90, 13, 16): [],
 (95, 16, 90): [],
 (33, 39, 31): [(339, 731, 733)],
 (41, 96, 76): [],
 (74, 45, 46): [(245, 46, 174), (245, 46, 274), (46, 145, 174)],
 (74, 45, 56): [(245, 56, 174), (245, 56, 274)],
 (45, 62

In [None]:
driver.close()