# Northwind Trades ® - Enhancing Product Recommendations and Warehouse Efficiency through Data-Driven Analytics
## Part 5: Page Rank Algorithm

In [None]:
import psycopg2
import pandas as pd

# Connect to PostgreSQL
conn = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

cur = conn.cursor()


### Count co-purchases between products

In [7]:
co_purchase_query = """
SELECT p1.product_id AS product_a, 
       p2.product_id AS product_b, 
       COUNT(*) AS co_purchase_count
FROM order_details od1
JOIN order_details od2 
  ON od1.order_id = od2.order_id AND od1.product_id <> od2.product_id
JOIN products p1 ON od1.product_id = p1.product_id
JOIN products p2 ON od2.product_id = p2.product_id
GROUP BY p1.product_id, p2.product_id
"""

co_purchase_df = pd.read_sql(co_purchase_query, conn)

print(co_purchase_df.head())


   product_a  product_b  co_purchase_count
0         18         36                  3
1         55         76                  2
2         16         62                  6
3          1         28                  1
4          4         64                  1


### Connect to Neo4j and create product graph

In [8]:
from neo4j import GraphDatabase

uri = "neo4j://neo4j:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "ucb_mids_w205"))

def create_product_graph(tx, product_a, product_b, count):
    tx.run("""
    MERGE (a:Product {id: $product_a})
    MERGE (b:Product {id: $product_b})
    MERGE (a)-[r:CO_PURCHASED {count: $count}]->(b)
    """, product_a=product_a, product_b=product_b, count=count)

In [9]:
with driver.session() as session:
    session.run("MATCH (n) DETACH DELETE n")
    
    for _, row in co_purchase_df.iterrows():
        session.execute_write(
            create_product_graph, 
            str(row['product_a']), 
            str(row['product_b']), 
            int(row['co_purchase_count'])
        )

In [10]:
with driver.session() as session:
    session.run("""
    CALL gds.graph.project(
      'productGraph',
      'Product',
      {
        CO_PURCHASED: {
          properties: 'count'
        }
      }
    )
    """)


### Runs PageRank and queries top 20 most influential products

In [11]:
with driver.session() as session:
    session.run("""
    CALL gds.pageRank.write('productGraph', {
      maxIterations: 20,
      dampingFactor: 0.85,
      writeProperty: 'pagerank'
    })
    """)
    
    result = session.run("""
    MATCH (p:Product)
    RETURN p.id AS productId, p.pagerank AS influence
    ORDER BY influence DESC
    LIMIT 20
    """)
    
    pagerank_results = pd.DataFrame([dict(record) for record in result])
    print("PageRank Results (Most Influential Products):")
    print(pagerank_results)
    
driver.close()

conn.close()


PageRank Results (Most Influential Products):
   productId  influence
0         60   1.429131
1         41   1.384868
2          2   1.376856
3         16   1.349988
4         56   1.317747
5         13   1.309049
6         64   1.305328
7         77   1.302974
8         24   1.284609
9         75   1.269225
10        31   1.244224
11        10   1.200998
12        39   1.191036
13        59   1.184514
14        40   1.178762
15        19   1.175179
16        46   1.171751
17        55   1.165826
18        62   1.160397
19        72   1.135368
