# find all levels of Employee Manager Hierarchy | Recursion

In [1]:
import os
os.getcwd()
os.chdir("H:\pyspark_advanced-coding_interview")
os.getcwd()

'H:\\pyspark_advanced-coding_interview'

In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

# Initialize Spark Session (if not already done)
spark = SparkSession.builder.appName("EmployeeHierarchy").getOrCreate()

# Define schema using StructType and StructField
schema = StructType([
    StructField("EmployeeID", IntegerType(), True),
    StructField("EmployeeName", StringType(), True),
    StructField("ManagerID", IntegerType(), True)
])

# Sample data representing employees and their managers
data = [
    (1, "Alice", None),    # Alice is the top-level manager
    (2, "Bob", 1),         # Bob reports to Alice
    (3, "Charlie", 1),     # Charlie reports to Alice
    (4, "David", 2),       # David reports to Bob
    (5, "Eve", 2),         # Eve reports to Bob
    (6, "Frank", 3),       # Frank reports to Charlie
    (7, "Grace", 3),       # Grace reports to Charlie
    (8, "Heidi", 4),       # Heidi reports to David
    (9, "Ivan", 5),        # Ivan reports to Eve
    (10, "Judy", 6)        # Judy reports to Frank
]

# Create DataFrame
df = spark.createDataFrame(data, schema)
df.show(truncate=False)


+----------+------------+---------+
|EmployeeID|EmployeeName|ManagerID|
+----------+------------+---------+
|1         |Alice       |null     |
|2         |Bob         |1        |
|3         |Charlie     |1        |
|4         |David       |2        |
|5         |Eve         |2        |
|6         |Frank       |3        |
|7         |Grace       |3        |
|8         |Heidi       |4        |
|9         |Ivan        |5        |
|10        |Judy        |6        |
+----------+------------+---------+



----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 64092)
Traceback (most recent call last):
  File "c:\Users\lpdda\AppData\Local\Programs\Python\Python311\Lib\socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "c:\Users\lpdda\AppData\Local\Programs\Python\Python311\Lib\socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "c:\Users\lpdda\AppData\Local\Programs\Python\Python311\Lib\socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "c:\Users\lpdda\AppData\Local\Programs\Python\Python311\Lib\socketserver.py", line 755, in __init__
    self.handle()
  File "C:\spark\python\pyspark\accumulators.py", line 281, in handle
    poll(accum_updates)
  File "C:\spark\python\pyspark\accumulators.py", line 253, in poll
    if func():
       ^^^^^^
  File "C:\spark\python\py

# Spark SQL

In [4]:
# Register the DataFrame as a temporary table
df.createOrReplaceTempView("Employee")


In [5]:
query1 = spark.sql("""                    
 WITH Level1 AS (
    SELECT EmployeeID, EmployeeName, ManagerID, 1 AS Level
    FROM Employee
    WHERE ManagerID IS NULL
)
SELECT * FROM Level1;                  
                   
                   """)

query1.show()

+----------+------------+---------+-----+
|EmployeeID|EmployeeName|ManagerID|Level|
+----------+------------+---------+-----+
|         1|       Alice|     null|    1|
+----------+------------+---------+-----+



In [6]:
query = spark.sql("""
 WITH Level1 AS (
    SELECT EmployeeID, EmployeeName, ManagerID, 1 AS Level
    FROM Employee
    WHERE ManagerID IS NULL
),
Level2 AS (
    SELECT e.EmployeeID, e.EmployeeName, e.ManagerID, l1.Level + 1 AS Level
    FROM Employee e
    INNER JOIN Level1 l1 ON e.ManagerID = l1.EmployeeID
),
Level3 AS (
    SELECT e.EmployeeID, e.EmployeeName, e.ManagerID, l2.Level + 1 AS Level
    FROM Employee e
    INNER JOIN Level2 l2 ON e.ManagerID = l2.EmployeeID
)
SELECT * FROM Level1
UNION ALL
SELECT * FROM Level2
UNION ALL
SELECT * FROM Level3;

                  
                  """)

query.show()

+----------+------------+---------+-----+
|EmployeeID|EmployeeName|ManagerID|Level|
+----------+------------+---------+-----+
|         1|       Alice|     null|    1|
|         2|         Bob|        1|    2|
|         3|     Charlie|        1|    2|
|         4|       David|        2|    3|
|         5|         Eve|        2|    3|
|         6|       Frank|        3|    3|
|         7|       Grace|        3|    3|
+----------+------------+---------+-----+



# Pyspark

In [7]:
df.show()

+----------+------------+---------+
|EmployeeID|EmployeeName|ManagerID|
+----------+------------+---------+
|         1|       Alice|     null|
|         2|         Bob|        1|
|         3|     Charlie|        1|
|         4|       David|        2|
|         5|         Eve|        2|
|         6|       Frank|        3|
|         7|       Grace|        3|
|         8|       Heidi|        4|
|         9|        Ivan|        5|
|        10|        Judy|        6|
+----------+------------+---------+



In [10]:
from pyspark.sql.functions import col, expr

# Step 1: Initialize the top-level managers (Level 1)
current_level = df.filter(col("ManagerID").isNull()).withColumn("Level", expr("1"))
hierarchy = current_level

# Step 2: Iterate to expand the hierarchy
for level in range(2, 10):  # Assume a maximum of 10 levels
    next_level = df.alias("e").join(
        current_level.alias("c"),
        col("e.ManagerID") == col("c.EmployeeID")
    ).select(
        col("e.EmployeeID"),
        col("e.EmployeeName"),
        col("e.ManagerID"),
        (col("c.Level") + 1).alias("Level")  # Correctly set the new column
    )
    
    # If no more records found, break the loop
    if next_level.count() == 0:
        break
    
    # Add to hierarchy and update the current level
    hierarchy = hierarchy.union(next_level)
    current_level = next_level

# Step 3: Show the complete hierarchy
hierarchy.show(truncate=False)



+----------+------------+---------+-----+
|EmployeeID|EmployeeName|ManagerID|Level|
+----------+------------+---------+-----+
|1         |Alice       |null     |1    |
|2         |Bob         |1        |2    |
|3         |Charlie     |1        |2    |
|4         |David       |2        |3    |
|5         |Eve         |2        |3    |
|6         |Frank       |3        |3    |
|7         |Grace       |3        |3    |
|8         |Heidi       |4        |4    |
|9         |Ivan        |5        |4    |
|10        |Judy        |6        |4    |
+----------+------------+---------+-----+

