In [17]:
from pyspark.sql import functions as F

spark

In [18]:
wzpath = "../../wz-testing-json"
df = spark.read.json(f"{wzpath}/Map.wz/Map/*/*.json", multiLine=True)

In [19]:
df.printSchema()

root
 |-- name: string (nullable = true)
 |-- payload: struct (nullable = true)
 |    |-- 0: struct (nullable = true)
 |    |    |-- info: string (nullable = true)
 |    |    |-- obj: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- index: long (nullable = true)
 |    |    |    |    |-- item: struct (nullable = true)
 |    |    |    |    |    |-- cx: string (nullable = true)
 |    |    |    |    |    |-- cy: string (nullable = true)
 |    |    |    |    |    |-- f: long (nullable = true)
 |    |    |    |    |    |-- flow: long (nullable = true)
 |    |    |    |    |    |-- forbidFallDown: long (nullable = true)
 |    |    |    |    |    |-- l0: string (nullable = true)
 |    |    |    |    |    |-- l1: string (nullable = true)
 |    |    |    |    |    |-- l2: string (nullable = true)
 |    |    |    |    |    |-- oS: string (nullable = true)
 |    |    |    |    |    |-- r: long (nullable = true)
 |    |    |    |    |    |--

In [20]:
df.select("name", "payload.info.*").select("name", "help", "mapDesc", "mapMark", "mapName", "streetName", "town").show(n=100)

+-------------+----+-------+-------------+--------+----------+----+
|         name|help|mapDesc|      mapMark| mapName|streetName|town|
+-------------+----+-------+-------------+--------+----------+----+
|105090000.img|null|       |      Dungeon|    null|      null|   0|
|103000900.img|null|       |        Quest|2??<1??>|   ???????|   0|
|680000000.img|null|   null|      Wedding|    null|      null|   1|
|104010000.img|null|       |         Rith|    null|      null|   0|
|109010100.img|null|       |        Event|    null|      null|   0|
|670010600.img|null|   null|      Wedding|    null|      null|   0|
|682000700.img|null|   null|    Halloween|    null|      null|   0|
|229030400.img|null|   null|HuntedMansion|    null|      null|   0|
|229030700.img|null|   null|HuntedMansion|    null|      null|   0|
|229030000.img|null|   null|HuntedMansion|    null|      null|   0|
|990000440.img|null|   null|        Guild|    null|      null|   1|
|105070001.img|null|       |      Dungeon|    nu

In [25]:
portals = (
    df.select("name", "payload.info.returnMap", "payload.portal")
    .withColumn("portal", F.explode("portal"))
    .select("name", "returnMap", "portal.item.*")
)
portals.show(n=10)

+-------------+---------+-----+-----------+-----+--------+------+---+------+---------+------+----+----+
|         name|returnMap|delay|hideTooltip|image|onlyOnce|    pn| pt|script|       tm|    tn|   x|   y|
+-------------+---------+-----+-----------+-----+--------+------+---+------+---------+------+----+----+
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |-343| -81|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |-277| -80|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |-217| -80|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |  -6| -51|
|105090000.img|105040300| null|       null| null|    null|    sp|  0|  null|999999999|      |  38|  -4|
|105090000.img|105040300| null|       null| null|    null|west00|  2|  null|105080000|east01|-324|  17|
|105090000.img|105040300| null|       null| null|    null|east00

https://github.com/lastbattle/Harepacker-resurrected/blob/c8e3a7ce4cd9f9735230369a8af6605de241077e/HaCreator/MapEditor/Info/PortalInfo.cs#L42-L87

* `pn` - this column contains the spawn points. `sp` is short for start point (found by looking at the particular element in HaCreator).
* `pt` is portal type?


In [26]:
portals.groupBy("pt").agg(F.count("*"), F.countDistinct("tm")).orderBy("pt").show()

+---+--------+---------+
| pt|count(1)|count(tm)|
+---+--------+---------+
|  0|    4245|        1|
|  1|    2573|      511|
|  2|    1525|      667|
|  3|     651|       98|
|  4|       9|        9|
|  5|      20|       16|
|  6|     158|        1|
|  7|     250|        1|
|  8|     209|        1|
|  9|      38|        1|
| 10|      79|       33|
| 11|       2|        1|
+---+--------+---------+



https://github.com/lastbattle/Harepacker-resurrected/blob/59335ea6fa699fd2f78bf2aee5169535b1093485/MapleLib/WzLib/WzStructure/Data/PortalType.cs
```
PORTALTYPE_STARTPOINT = "sp",
PORTALTYPE_INVISIBLE = "pi",
PORTALTYPE_VISIBLE = "pv",
PORTALTYPE_COLLISION = "pc",
PORTALTYPE_CHANGABLE = "pg",
PORTALTYPE_CHANGABLE_INVISIBLE = "pgi",
PORTALTYPE_TOWNPORTAL_POINT = "tp",
PORTALTYPE_SCRIPT = "ps",
PORTALTYPE_SCRIPT_INVISIBLE = "psi",
PORTALTYPE_COLLISION_SCRIPT = "pcs",
PORTALTYPE_HIDDEN = "ph",
PORTALTYPE_SCRIPT_HIDDEN = "psh",
PORTALTYPE_COLLISION_VERTICAL_JUMP = "pcj",
PORTALTYPE_COLLISION_CUSTOM_IMPACT = "pci",
PORTALTYPE_COLLISION_UNKNOWN_PCIG = "pcig",
PORTALTYPE_SCRIPT_HIDDEN_UNG = "pshg";
```

In [27]:
portals.groupBy("pn").count().orderBy(F.desc("count")).show(n=10)

+-------+-----+
|     pn|count|
+-------+-----+
|     sp| 4240|
|  out00|  547|
|   st00|  321|
| west00|  307|
| east00|  287|
|   in00|  225|
|     tp|  163|
|  top00|  155|
|under00|  145|
|   h001|   92|
+-------+-----+
only showing top 10 rows



In [28]:
portals.where(F.col("pn")!="sp").where("tm <> 999999999").show()

+-------------+---------+-----+-----------+-----+--------+-------+---+------+---------+-------+-----+----+
|         name|returnMap|delay|hideTooltip|image|onlyOnce|     pn| pt|script|       tm|     tn|    x|   y|
+-------------+---------+-----+-----------+-----+--------+-------+---+------+---------+-------+-----+----+
|105090000.img|105040300| null|       null| null|    null| west00|  2|  null|105080000| east01| -324|  17|
|105090000.img|105040300| null|       null| null|    null| east00|  2|  null|105090100| west00| 2199|3369|
|103000900.img|103000000| null|       null| null|    null|   in00|  2|  null|103000900|  out00|  252|-230|
|103000900.img|103000000| null|       null| null|    null|  out00|  2|  null|103000900|   in00| -544|-521|
|103000900.img|103000000| null|       null| null|    null|   in02|  2|  null|103000900|  out02| 1159|-950|
|103000900.img|103000000| null|       null| null|    null|   in04|  2|  null|103000900| last00| 3281|-859|
|103000900.img|103000000| null|      

In [29]:
portalEdges = (
    portals
    .where(F.col("pn")!="sp")
    .where("tm <> 999999999")
    .select(F.split("name", "\.")[0].alias("map"), F.col("tm").alias("portal"), "returnMap")
    .where("map <> portal")
    .distinct()
)
portalEdges.count()

2087

In [30]:
portalEdges.show()

+---------+---------+---------+
|      map|   portal|returnMap|
+---------+---------+---------+
|680000000|680000001|680000000|
|104010000|104010002|104000000|
|229030000|229030100|229030300|
|105090100|105090200|105040300|
|101000000|100050000|101000000|
|103000000|103000001|103000000|
|103000000|103000005|103000000|
|103000000|103000100|103000000|
|103000000|103010000|103000000|
|280020001|280020000|211000000|
|105050200|105050300|105040300|
|600020000|600020100|600000000|
|221022200|221022100|221000000|
|600010400|600010500|600000000|
|109030203|109050000|109050001|
|109010200|109010201|109050001|
|250000000|250000001|999999999|
|250000000|250000002|999999999|
|107000400|107000500|103000000|
|600000000|600010100|600000000|
+---------+---------+---------+
only showing top 20 rows



In [31]:
portalEdges.groupBy("returnMap").agg(F.countDistinct("map").alias("n")).orderBy(F.desc("n")).show()

+---------+---+
|returnMap|  n|
+---------+---+
|220000000| 97|
|682000000| 66|
|240000000| 58|
|200000000| 58|
|211000000| 57|
|105040300| 52|
|102000000| 50|
|221000000| 49|
|103000000| 45|
|101000000| 39|
|109050001| 38|
|100000000| 36|
|260000000| 35|
|230000000| 33|
|280090000| 30|
|910000000| 23|
|250000000| 21|
|922010000| 20|
|222000000| 20|
|990000200| 20|
+---------+---+
only showing top 20 rows



In [32]:
edges = (
    portalEdges
    .select(F.col("map").alias("src"), F.col("portal").alias("dst"), F.lit("portal").alias("type"))
    .union(portalEdges.select(F.col("returnMap").alias("src"), F.col("map").alias("dst"), F.lit("return").alias("type")))
)

(
    edges
    .selectExpr("src as Source", "dst as Target", "type as Label")
    .toPandas()
    .to_csv("../data/processed/map-portals.csv", index=False)
)

In [34]:
mapStrings = spark.read.json(f"{wzpath}/String.wz/Map*.json", multiLine=True)
mapStrings.printSchema()

root
 |-- name: string (nullable = true)
 |-- payload: struct (nullable = true)
 |    |-- HalloweenGL: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- index: long (nullable = true)
 |    |    |    |-- item: struct (nullable = true)
 |    |    |    |    |-- mapName: string (nullable = true)
 |    |    |    |    |-- streetName: string (nullable = true)
 |    |-- MasteriaGL: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- index: long (nullable = true)
 |    |    |    |-- item: struct (nullable = true)
 |    |    |    |    |-- mapName: string (nullable = true)
 |    |    |    |    |-- streetName: string (nullable = true)
 |    |-- etc: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- index: long (nullable = true)
 |    |    |    |-- item: struct (nullable = true)
 |    |    |    |    |-- mapName: string (nullable = true)
 |    |    |    |    |-- 

In [44]:
victoria = mapStrings.select(F.explode("payload.victoria")).select("col.index", "col.item.*")
victoria.show()

+---------+-----+-----+--------------------+--------------------+-------------+
|    index|help0|help1|             mapDesc|             mapName|   streetName|
+---------+-----+-----+--------------------+--------------------+-------------+
|100000000| null| null|It's a bowman tow...|             Henesys|Victoria Road|
|100000001| null| null|                null|  Henesys Townstreet|Victoria Road|
|100000002| null| null|                null|      An Empty House|Hidden Street|
|100000003| null| null|                null|            Pig Park|Hidden Street|
|100000004| null| null|                null|         Pig Park II|Hidden Street|
|100000005| null| null|                null|Someone Else's House|Hidden Street|
|100000006| null| null|                null|The Resting Spot,...|Hidden Street|
|100000100| null| null|                null|      Henesys Market|Victoria Road|
|100000101| null| null|                null|Henesys Weapon Store|Victoria Road|
|100000102| null| null|                n

In [51]:
(
    edges
    .join(victoria.select(F.col("index").alias("src")), on="src")
    .join(victoria.select(F.col("index").alias("dst")), on="dst")
    .selectExpr("src as Source", "dst as Target", "type as Label")
    .toPandas()
    .to_csv("../data/processed/victoria-portal-edges.csv", index=False)
)

(
    victoria
    .select(F.col("index").alias("Id"), F.concat("streetName", F.lit(": "), "mapName").alias("Label"))
    .toPandas()
    .to_csv("../data/processed/victoria-portal-nodes.csv", index=False)
)