In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, sum as spark_sum

spark = SparkSession.builder.appName("MobEfficiency").getOrCreate()

# 데이터 불러오기
mobs = spark.read.csv('../learning_spark_data/minecraft/Mobs.csv', header=True, inferSchema=True)
food = spark.read.csv('../learning_spark_data/minecraft/Food.csv', header=True, inferSchema=True)
mob_food = spark.read.csv('../learning_spark_data/minecraft/MobFoodDrops.csv', header=True, inferSchema=True)

In [3]:
# 필요없는 데이터 컬럼 drop
mobs = mobs.drop("behaviorTypes", "spawnBehavior", "debutDate", "minecraftVersion", "reproductiveRequirement")
food = food.drop("debutDate", "minecraftVersion")

In [4]:
# NULL 처리 및 캐스팅
mobs = mobs.withColumn("healthPoints", col("healthPoints").cast("double"))
mobs = mobs.withColumn("maxDamage", when(col("maxDamage").isNull(), 0).otherwise(col("maxDamage")).cast("double"))
mobs = mobs.withColumn("difficultyScore", col("healthPoints") + col("maxDamage") * 2)

food = food.withColumn("hunger", col("hunger").cast("double"))  # 드롭 가치 = hunger

In [5]:
# NULL 값 처리 및 데이터 확인
print(mobs.show(10))
print(food.show(10))
print(mob_food.show(10))

+---+--------------+------------+---------+---------------+
| ID|          name|healthPoints|maxDamage|difficultyScore|
+---+--------------+------------+---------+---------------+
|  1|   sniffer_egg|         0.0|      0.0|            0.0|
|  2|    turtle_egg|         0.0|      0.0|            0.0|
|  3|           bat|         6.0|      0.0|            6.0|
|  4|         blaze|        20.0|      6.0|           32.0|
|  5|       chicken|         4.0|      0.0|            4.0|
|  6|       dolphin|        10.0|      3.0|           16.0|
|  7|     endermite|         8.0|      2.0|           12.0|
|  8|      guardian|        30.0|      6.0|           42.0|
|  9|guardian_elder|        80.0|      8.0|           96.0|
| 10|       phantom|        20.0|      2.0|           24.0|
+---+--------------+------------+---------+---------------+
only showing top 10 rows

None
+---+---------------+------+------+
| ID|           name|  type|hunger|
+---+---------------+------+------+
| 13| cooked_chicken|

In [6]:
# mobID → 몹 ID, foodID → 음식 ID로 연결
# 먼저 mob_food + food 조인 (foodID 기준)
mob_food_value = mob_food.join(food, mob_food["foodID"] == food["ID"], how="left") \
                         .select(mob_food["mobID"], food["hunger"])

# 몹별 총 음식 드롭 가치 합산
mob_drop_value = mob_food_value.groupBy("mobID").agg(
    spark_sum("hunger").alias("totalDropValue")
)

In [7]:
# mobs.ID ↔ mob_drop_value.mobID 연결
mob_efficiency = mobs.join(mob_drop_value, mobs["ID"] == mob_drop_value["mobID"], how="left")

# NULL 처리: 드롭 없으면 0
mob_efficiency = mob_efficiency.withColumn("totalDropValue",
    when(col("totalDropValue").isNull(), 0).otherwise(col("totalDropValue"))
)

# 가성비 계산: 드롭 가치 / (사냥 난이도 + 1)  ← 0 나눔 방지
mob_efficiency = mob_efficiency.withColumn("efficiencyScore",
    col("totalDropValue") / (col("difficultyScore") + 1)
)


In [8]:
mob_efficiency.select("name", "healthPoints", "maxDamage", "difficultyScore", "totalDropValue", "efficiencyScore") \
              .orderBy(col("efficiencyScore").desc()) \
              .show(10, truncate=False)


+---------------+------------+---------+---------------+--------------+------------------+
|name           |healthPoints|maxDamage|difficultyScore|totalDropValue|efficiencyScore   |
+---------------+------------+---------+---------------+--------------+------------------+
|cod            |3.0         |0.0      |3.0            |2.0           |0.5               |
|salmon         |3.0         |0.0      |3.0            |2.0           |0.5               |
|chicken        |4.0         |0.0      |4.0            |2.0           |0.4               |
|husk           |20.0        |3.0      |26.0           |8.0           |0.2962962962962963|
|zombie         |20.0        |3.0      |26.0           |8.0           |0.2962962962962963|
|zombie_villager|20.0        |3.0      |26.0           |8.0           |0.2962962962962963|
|cow            |10.0        |0.0      |10.0           |3.0           |0.2727272727272727|
|mooshroom      |10.0        |0.0      |10.0           |3.0           |0.2727272727272727|