In [20]:
from pyspark.sql import functions as F, SparkSession
import functools
import json
from pathlib import Path

In [12]:
spark = SparkSession.builder.getOrCreate()
wzpath = "../../wz-testing-json"
df = spark.read.json(f"{wzpath}/Map.wz/Map/*/*.json", multiLine=True)

In [13]:
df.count()

1438

* https://github.com/johncintron/nodin/blob/master/docs/map.md
* https://github.com/johncintron/nodin/blob/master/docs/tile.md

In [14]:
portals = (
    df.select("name", "payload.info.returnMap", "payload.portal")
    .withColumn("portal", F.explode("portal"))
    .select("name", "returnMap", "portal.item.*")
)
portals.show(n=10)

+-------------+---------+-----+-----------+-----+--------+------+---+------+---------+------+----+----+
|         name|returnMap|delay|hideTooltip|image|onlyOnce|    pn| pt|script|       tm|    tn|   x|   y|
+-------------+---------+-----+-----------+-----+--------+------+---+------+---------+------+----+----+
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |-343| -81|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |-277| -80|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |-217| -80|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |  -6| -51|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |  38|  -4|
|105090000.img|105040300| null|       null| null|    null|west00|  2|  null|105080000|east01|-324|  17|
|105090000.img|105040300| null|       null| null|    null|east00

In [15]:
portals.where("pt = 6").groupBy("name").count().show()

+-------------+-----+
|         name|count|
+-------------+-----+
|104000000.img|    5|
|103000000.img|    6|
|200000000.img|    6|
|800000000.img|    6|
|240000000.img|    6|
|000060000.img|    6|
|221000000.img|    6|
|540000000.img|    6|
|105040300.img|    6|
|001010000.img|    6|
|801000000.img|    6|
|222000000.img|    6|
|260000000.img|    6|
|110000000.img|    6|
|251000000.img|    6|
|680000000.img|    6|
|100000000.img|    6|
|220000000.img|    6|
|541000000.img|    6|
|101000000.img|    6|
+-------------+-----+
only showing top 20 rows



In [18]:
portals.where("pt = 6").groupBy("name").count().where("count < 6").show()

+-------------+-----+
|         name|count|
+-------------+-----+
|104000000.img|    5|
|682000000.img|    3|
+-------------+-----+



In [54]:
data = json.loads(Path(f"{wzpath}/String.wz/Map.img.json").read_text())
res = []
for k,v in data["payload"].items():
    for item in v:
        res += [{"index": item["index"], **item["item"]}]

In [55]:
strings = spark.createDataFrame(res)
strings.printSchema()

root
 |-- index: long (nullable = true)
 |-- mapName: string (nullable = true)
 |-- streetName: string (nullable = true)
 |-- mapDesc: string (nullable = true)
 |-- help0: string (nullable = true)
 |-- help1: string (nullable = true)



In [66]:
(
    portals.where("pt = 6")
    .groupBy("name").count()
    .where("count < 6")
    .selectExpr("cast(split(name, '\\\.')[0] as long) as index", "count")
    .join(strings.select("index", "mapName"), on="index")
).show()

+---------+-----+-------------+
|    index|count|      mapName|
+---------+-----+-------------+
|104000000|    5|  Lith Harbor|
|682000000|    3|Haunted House|
+---------+-----+-------------+

