In [29]:
from pyspark.sql import functions as F, SparkSession
import functools
import json

In [10]:
spark = SparkSession.builder.getOrCreate()
wzpath = "../../wzedits/dumps"
df = spark.read.json(f"{wzpath}/Map.wz/Map/*/*.json", multiLine=True)

In [11]:
df.count()

176

* https://github.com/johncintron/nodin/blob/master/docs/map.md
* https://github.com/johncintron/nodin/blob/master/docs/tile.md

In [9]:
df.select("payload.0.tile")

DataFrame[tile: array<struct<index:bigint,item:struct<no:bigint,u:string,x:bigint,y:bigint,zM:bigint>>>]

In [34]:
info = (
    functools.reduce(
        lambda a, b: a.union(b), 
        [df.select(f"payload.{i}.info") for i in range(8)]
    )
    .where("info <> '[]'")
)

In [44]:
info.withColumn("len", F.length("info")).orderBy(F.desc("len")).show(truncate=80, n=5)

+-----------------------------------+---+
|                               info|len|
+-----------------------------------+---+
|{"tS":"greenCrystalCave","tSMag":1}| 35|
|{"tS":"greenCrystalCave","tSMag":1}| 35|
|{"tS":"greenCrystalCave","tSMag":1}| 35|
|{"tS":"greenCrystalCave","tSMag":1}| 35|
|{"tS":"greenCrystalCave","tSMag":1}| 35|
+-----------------------------------+---+
only showing top 5 rows



In [45]:
@F.udf("string")
def getTS(x):
    return json.loads(x)["tS"]

# find the counts of each referenced tile
tiles = (
    info
    .select(getTS("info").alias("tile"))
    .groupBy("tile")
    .count()
    .orderBy("tile")
)
tiles.cache()
tiles.show()

+----------------+-----+
|            tile|count|
+----------------+-----+
|        blueCave|   28|
|       blueCave2|    6|
|        darkCave|   14|
|        darkWood|   82|
|        deepCave|   34|
|         dryRock|   21|
|greenCrystalCave|   15|
|  snowyLightrock|    5|
|         wetWood|    3|
|      woodMarble|   23|
| yellowToyCastle|   18|
+----------------+-----+



In [54]:
df.select(F.explode("payload.1.obj")).select("col.item.*").show()

+---+--------------+--------+-----+---+-------+----+-------+----+----+---+---+
|  f|forbidFallDown|      l0|   l1| l2|     oS|   r|reactor|   x|   y|  z| zM|
+---+--------------+--------+-----+---+-------+----+-------+----+----+---+---+
|  0|          null|blueCave|  acc|  0|dungeon|null|   null| 272|-318|  4|209|
|  0|          null|blueCave|  acc|  1|dungeon|null|   null| 244|-346|  3|209|
|  0|          null|blueCave|  acc|  2|dungeon|null|   null| 296|-350|  3|209|
|  0|          null|blueCave|  acc|  3|dungeon|null|   null| 316|-335|  5|209|
|  0|          null|blueCave|  acc|  4|dungeon|null|   null| 331|-349|  3|209|
|  0|          null|darkCave| foot|  4|dungeon|null|   null| 323|-211|  4| 89|
|  0|          null|  house3|basic|  0|houseDW|null|   null| 112|-186|  2| 89|
|  0|          null|darkCave|  acc|  9|dungeon|null|   null| 218|-187|  3| 89|
|  0|          null|blueCave| foot|  1|dungeon|null|   null|1512|1643|  3|  3|
|  0|          null|blueCave| foot|  1|dungeon|null|

In [60]:
obj = (
    functools.reduce(
        lambda a, b: a.union(b), 
        [
            (
                df
                .select(F.explode(f"payload.{i}.obj").alias("obj"))
                .select("obj.item.*")
                .select(
                    F.col("oS").alias("obj"), 
                    F.col("l0").alias("level_0"), 
                    F.col("l1").alias("level_1"),
                    F.col("l2").alias("level_2")
                )
            )
            for i in range(8)
        ]
    )
)

obj.show(n=3)

+-------+--------+-------+-------+
|    obj| level_0|level_1|level_2|
+-------+--------+-------+-------+
|dungeon|blueCave|    acc|      0|
|dungeon|blueCave|    acc|      1|
|dungeon|blueCave|    acc|      2|
+-------+--------+-------+-------+
only showing top 3 rows



In [65]:
cols = ["obj", "level_0", "level_1", "level_2"]
obj.groupBy(cols[:1]).count().orderBy(*cols[:1]).show(n=100)

+---------+-----+
|      obj|count|
+---------+-----+
| AmoriaGL|    1|
|     acc1| 2393|
|     acc2|  213|
|     acc3|    6|
|     acc4|   72|
|     acc9|  344|
|  connect| 4218|
|     door|   37|
|  dungeon| 2876|
|    guide|   20|
|    guild|   60|
|    hotel|   80|
|    house|   17|
|  houseDW|   57|
|  houseGS|    7|
| houseSLR|    5|
|signboard|  242|
|     trap|  633|
|  upgrade|  525|
+---------+-----+

