# Initial Prep

## Library Import

In [None]:
from ast import literal_eval
import math
import os
import re
import warnings

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
warnings.filterwarnings(action= 'ignore')

## Connect to Drive

In [None]:
# from google.colab import drive

# drive.mount('/content/drive', force_remount=True)

### Copy Files from Drive

In [None]:
# !mkdir update_changes/
# !mkdir update_changes/tw/
# !mkdir update_changes/tw/20220808
# !mkdir update_changes/tw/20220808/cleaned
# !mkdir update_changes/20220808/
# !mkdir update_changes/20220808/parsed

# !cp -r /content/drive/MyDrive/update_changes/tw/20220808/cleaned/* ./update_changes/tw/20220808/cleaned/

## Folder Prep

In [None]:
APK_DATE = "20220808"
MAIN_DIR = "./update_changes/tw"
MAIN_CLEANED_DIR = f"{MAIN_DIR}/{APK_DATE}/cleaned"
MAIN_PARSED_DIR = f"./update_changes/{APK_DATE}/parsed"

if not os.path.isdir(MAIN_PARSED_DIR):
  os.mkdir(MAIN_PARSED_DIR)

## Function Initializtion

In [None]:
def get_id_value(text: str, context: str, with_underscore: bool = False) -> tuple:
  """
  Get the id for the key this id will be used
  to connect the information to the other
  tables (basically primary key for the information
  provided)

  Parameters
  ----------
  text
    Text in the form of the data dump mostly it 
    looks like this
    `["AttrPool_11010023"]="Refine to +9, ATK +5%",`
  context
    The name of the key before the id, for 
    example for the text above, the context will
    be `AttrPool`
  with_underscore
    Whether the attr contains underscore in its name
    or not

  Returns
  -------
  id, value
    tuple containing id and value of the attr
  """
  
  regex = f'\"{context}([\d\ \_]*)\"]=\"(.*)\"'

  if with_underscore:
    regex = f'\"{context}_(\w*)\"]=\"(.*)\"'

  return re.findall(regex, text)[0]

def create_and_add_entry_tw(context: str, key: str, with_underscore: bool) -> None:
  """
  Create entry for table and append it to the
  existing dictionary

  Parameters
  ----------
  context
    The name of the key before the id, for 
    example for the text above, the context will
    be `AttrPool`
  key
    The name of the key in the dictionary
  with_underscore
    Whether the attr contains underscore in its name
    or not

  Returns
  -------
  None
  """

  try:
    entry = {}

    id, value = get_id_value(text, context, with_underscore=with_underscore)
    entry["id"] = id
    entry["value"] = value

    tw_parsed_data[key].append(entry)
  except:
    pass

def create_and_add_entry_en(context: str, key: str, with_underscore: bool) -> None:
  """
  Create entry for table and append it to the
  existing dictionary

  Parameters
  ----------
  context
    The name of the key before the id, for 
    example for the text above, the context will
    be `AttrPool`
  key
    The name of the key in the dictionary
  with_underscore
    Whether the attr contains underscore in its name
    or not

  Returns
  -------
  None
  """

  try:
    entry = {}

    id, value = get_id_value(text, context, with_underscore=with_underscore)
    entry["id"] = id
    entry["value"] = value

    en_parsed_data[key].append(entry)
  except:
    pass

In [None]:
def parse_and_convert(texts):
  texts = " ".join(texts)
  texts = re.sub(r'\[([\w\"]+)\]=', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

  texts = texts.replace("{}", "[]")
  texts = texts.replace("{ {", "[ {")
  texts = texts.replace("} }", "} ]")

  texts = "{" + texts + "}"

  parsed_dict = literal_eval(texts)

  return parsed_dict

# Parser

## En_langs

### Declare RE for cleaning Chinese chars

In [None]:
RE = re.compile(u'[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]', re.UNICODE)

### Parse data

In [None]:
tw_parsed_data = {
    "area_name": [],
    "area_name_new": [],
    "attr_pool": [],
    "attr": [],
    "battle_pass_quest_desc": [],
    "battle_pass_quest_name": [],
    "size": [],
    "buff_desc": [],
    "buff_name": [],
    "card_attr_desc": [],
    "card_coordinates": [],
    "equip_desc": [],
    "equip_name": [],
    "equip_type": [],
    "goods_desc": [],
    "item_desc": [],
    "item_name": [],
    "item_type": [],
    "instance_description": [],
    "job_name": [],
    "mvp_desc": [],
    "mvp_name": [],
    "map_npc_name": [],
    "monster_desc": [],
    "monster_name": [],
    "monster_type": [],
    "ox_exam_question": [],
    "property": [],
    "race": [],
    "scene_name": [],
    "shadow_weapon": [],
    "shadow_weapon_name": [],
    "shadow_weapon_task": [],
    "shadow_weapon_task_name": [],
    "shadow_weapon_prop_des": [],
    "skill_name": [],
    "skill_desc": [],
    "suit_name": [],
    "title": [],
    "weather": [],
    "weather_desc": [],
    "mount_name": [],
    "mount_desc": [],
    "mount_job": [],
    "scene_name": []
}

# Initially I want to create something like this for DRY, but for
# the sake of readability will do it manually
# contexts = ["Areaname", "AttrPool", "Attr", "BattlePassQuestDesc"]

with open(f"{MAIN_CLEANED_DIR}/tw_langs.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    if text.startswith('["areaname'):
      create_and_add_entry_tw("areaname", "area_name", False)
    elif text.startswith('["Areaname'):
      create_and_add_entry_tw("Areaname", "area_name_new", False)
    elif text.startswith('["AttrPool'):
      create_and_add_entry_tw("AttrPool", "attr_pool", True)
    elif text.startswith('["Attr'):
      create_and_add_entry_tw("Attr", "attr", True)
    elif text.startswith('["BattlePassQuestDesc'):
      create_and_add_entry_tw("BattlePassQuestDesc", "battle_pass_quest_desc", True)
    elif text.startswith('["BattlePassQuestName'):
      create_and_add_entry_tw("BattlePassQuestName", "battle_pass_quest_desc", True)
    elif text.startswith('["Body'):
      create_and_add_entry_tw("Body", "body", True)
    elif text.startswith('["BuffDes'):
      create_and_add_entry_tw("BuffDes", "buff_desc", True)
    elif text.startswith('["BuffName'):
      create_and_add_entry_tw("BuffName", "buff_name", True)
    elif text.startswith('["CardAttributeDescription'):
      create_and_add_entry_tw("CardAttributeDescription", "card_attr_desc", True)
    elif text.startswith('["CardCoordinates'):
      create_and_add_entry_tw("CardCoordinates", "card_coordinates", True)
    elif text.startswith('["EquipDesc'):
      create_and_add_entry_tw("EquipDesc", "equip_desc", True)
    elif text.startswith('["EquipName'):
      create_and_add_entry_tw("EquipName", "equip_name", True)
    elif text.startswith('["equipmentType'):
      create_and_add_entry_tw("equipmentType", "equip_type", True)
    elif text.startswith('["GoodsDes'):
      create_and_add_entry_tw("GoodsDes", "goods_desc", False)
    elif text.startswith('["ItemDes'):
      create_and_add_entry_tw("ItemDes", "item_desc", True)
    elif text.startswith('["ItemName'):
      create_and_add_entry_tw("ItemName", "item_name", True)
    elif text.startswith('["ItemType'):
      create_and_add_entry_tw("ItemType", "item_type", False)
    elif text.startswith('["InstanceDescription'):
      create_and_add_entry_tw("InstanceDescription", "instance_description", False)
    elif text.startswith('["JobName'):
      create_and_add_entry_tw("JobName", "job_name", True)
    elif text.startswith('["JobName'):
      create_and_add_entry_tw("JobName", "job_name", True)
    elif text.startswith('["MVPDes'):
      create_and_add_entry_tw("MVPDes", "mvp_desc", True)
    elif text.startswith('["MVPName'):
      create_and_add_entry_tw("MVPName", "mvp_name", True)
    elif text.startswith('["MapNpcName'):
      create_and_add_entry_tw("MapNpcName", "map_npc_name", False)
    elif text.startswith('["MonsterCollection'):
      create_and_add_entry_tw("MonsterCollection", "monster_desc", False)
    elif text.startswith('["MonsterName'):
      create_and_add_entry_tw("MonsterName", "monster_name", False)
    elif text.startswith('["MonsterType'):
      create_and_add_entry_tw("MonsterType", "monster_type", True)
    elif text.startswith('["OXExam'):
      create_and_add_entry_tw("OXExam", "ox_exam_question", True)
    elif text.startswith('["Property'):
      create_and_add_entry_tw("Property", "property", False)
    elif text.startswith('["Race'):
      create_and_add_entry_tw("Race", "race", True)
    elif text.startswith('["SceneName'):
      create_and_add_entry_tw("SceneName", "scene_name", False)
    elif text.startswith('["ShadowWeaponName'):
      create_and_add_entry_tw("ShadowWeaponName", "shadow_weapon_name", True)
    elif text.startswith('["ShadowWeaponTaskName'):
      create_and_add_entry_tw("ShadowWeaponTaskName", "shadow_weapon_task_name", True)
    elif text.startswith('["ShadowWeaponPropDes'):
      create_and_add_entry_tw("ShadowWeaponPropDes", "shadow_weapon_prop_des", True)
    elif text.startswith('["ShadowWeaponTask'):
      create_and_add_entry_tw("ShadowWeaponTask", "shadow_weapon_task", True)
    elif text.startswith('["SkillDesc'):
      create_and_add_entry_tw("SkillDesc", "skill_desc", True)
    elif text.startswith('["SkillName'):
      create_and_add_entry_tw("SkillName", "skill_name", True)
    elif text.startswith('["SuitName'):
      create_and_add_entry_tw("SuitName", "suit_name", False)
    elif text.startswith('["Title'):
      create_and_add_entry_tw("Title", "title", True)
    elif text.startswith('["WeatherDes'):
      create_and_add_entry_tw("WeatherDes", "weather_desc", True)
    elif text.startswith('["Weather'):
      create_and_add_entry_tw("Weather", "weather", True)
    elif text.startswith('["SceneName'):
      create_and_add_entry_tw("SceneName", "scene_name", True)

In [None]:
en_parsed_data = {
    "area_name": [],
    "area_name_new": [],
    "attr_pool": [],
    "attr": [],
    "battle_pass_quest_desc": [],
    "battle_pass_quest_name": [],
    "size": [],
    "buff_desc": [],
    "buff_name": [],
    "card_attr_desc": [],
    "card_coordinates": [],
    "equip_desc": [],
    "equip_name": [],
    "equip_type": [],
    "goods_desc": [],
    "item_desc": [],
    "item_name": [],
    "item_type": [],
    "instance_description": [],
    "job_name": [],
    "mvp_desc": [],
    "mvp_name": [],
    "map_npc_name": [],
    "monster_desc": [],
    "monster_name": [],
    "monster_type": [],
    "ox_exam_question": [],
    "property": [],
    "race": [],
    "scene_name": [],
    "shadow_weapon": [],
    "shadow_weapon_name": [],
    "shadow_weapon_task": [],
    "shadow_weapon_task_name": [],
    "shadow_weapon_prop_des": [],
    "skill_name": [],
    "skill_desc": [],
    "suit_name": [],
    "title": [],
    "weather": [],
    "weather_desc": [],
    "mount_name": [],
    "mount_desc": [],
    "mount_job": [],
    "scene_name": []
}

# Initially I want to create something like this for DRY, but for
# the sake of readability will do it manually
# contexts = ["Areaname", "AttrPool", "Attr", "BattlePassQuestDesc"]

with open(f"{MAIN_CLEANED_DIR}/en_langs.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    if text.startswith('["areaname'):
      create_and_add_entry_en("areaname", "area_name", False)
    elif text.startswith('["Areaname'):
      create_and_add_entry_en("Areaname", "area_name_new", False)
    elif text.startswith('["AttrPool'):
      create_and_add_entry_en("AttrPool", "attr_pool", True)
    elif text.startswith('["Attr'):
      create_and_add_entry_en("Attr", "attr", True)
    elif text.startswith('["BattlePassQuestDesc'):
      create_and_add_entry_en("BattlePassQuestDesc", "battle_pass_quest_desc", True)
    elif text.startswith('["BattlePassQuestName'):
      create_and_add_entry_en("BattlePassQuestName", "battle_pass_quest_desc", True)
    elif text.startswith('["Body'):
      create_and_add_entry_en("Body", "body", True)
    elif text.startswith('["BuffDes'):
      create_and_add_entry_en("BuffDes", "buff_desc", True)
    elif text.startswith('["BuffName'):
      create_and_add_entry_en("BuffName", "buff_name", True)
    elif text.startswith('["CardAttributeDescription'):
      create_and_add_entry_en("CardAttributeDescription", "card_attr_desc", True)
    elif text.startswith('["CardCoordinates'):
      create_and_add_entry_en("CardCoordinates", "card_coordinates", True)
    elif text.startswith('["EquipDesc'):
      create_and_add_entry_en("EquipDesc", "equip_desc", True)
    elif text.startswith('["EquipName'):
      create_and_add_entry_en("EquipName", "equip_name", True)
    elif text.startswith('["equipmentType'):
      create_and_add_entry_en("equipmentType", "equip_type", True)
    elif text.startswith('["GoodsDes'):
      create_and_add_entry_en("GoodsDes", "goods_desc", False)
    elif text.startswith('["ItemDes'):
      create_and_add_entry_en("ItemDes", "item_desc", True)
    elif text.startswith('["ItemName'):
      create_and_add_entry_en("ItemName", "item_name", True)
    elif text.startswith('["ItemType'):
      create_and_add_entry_en("ItemType", "item_type", False)
    elif text.startswith('["InstanceDescription'):
      create_and_add_entry_en("InstanceDescription", "instance_description", False)
    elif text.startswith('["JobName'):
      create_and_add_entry_en("JobName", "job_name", True)
    elif text.startswith('["JobName'):
      create_and_add_entry_en("JobName", "job_name", True)
    elif text.startswith('["MVPDes'):
      create_and_add_entry_en("MVPDes", "mvp_desc", True)
    elif text.startswith('["MVPName'):
      create_and_add_entry_en("MVPName", "mvp_name", True)
    elif text.startswith('["MapNpcName'):
      create_and_add_entry_en("MapNpcName", "map_npc_name", False)
    elif text.startswith('["MonsterCollection'):
      create_and_add_entry_en("MonsterCollection", "monster_desc", False)
    elif text.startswith('["MonsterName'):
      create_and_add_entry_en("MonsterName", "monster_name", False)
    elif text.startswith('["MonsterType'):
      create_and_add_entry_en("MonsterType", "monster_type", True)
    elif text.startswith('["OXExam'):
      create_and_add_entry_en("OXExam", "ox_exam_question", True)
    elif text.startswith('["Property'):
      create_and_add_entry_en("Property", "property", False)
    elif text.startswith('["Race'):
      create_and_add_entry_en("Race", "race", True)
    elif text.startswith('["SceneName'):
      create_and_add_entry_en("SceneName", "scene_name", False)
    elif text.startswith('["ShadowWeaponName'):
      create_and_add_entry_en("ShadowWeaponName", "shadow_weapon_name", True)
    elif text.startswith('["ShadowWeaponTaskName'):
      create_and_add_entry_en("ShadowWeaponTaskName", "shadow_weapon_task_name", True)
    elif text.startswith('["ShadowWeaponPropDes'):
      create_and_add_entry_en("ShadowWeaponPropDes", "shadow_weapon_prop_des", True)
    elif text.startswith('["ShadowWeaponTask'):
      create_and_add_entry_en("ShadowWeaponTask", "shadow_weapon_task", True)
    elif text.startswith('["SkillDesc'):
      create_and_add_entry_en("SkillDesc", "skill_desc", True)
    elif text.startswith('["SkillName'):
      create_and_add_entry_en("SkillName", "skill_name", True)
    elif text.startswith('["SuitName'):
      create_and_add_entry_en("SuitName", "suit_name", False)
    elif text.startswith('["Title'):
      create_and_add_entry_en("Title", "title", True)
    elif text.startswith('["WeatherDes'):
      create_and_add_entry_en("WeatherDes", "weather_desc", True)
    elif text.startswith('["Weather'):
      create_and_add_entry_en("Weather", "weather", True)
    elif text.startswith('["SceneName'):
      create_and_add_entry_en("SceneName", "scene_name", True)

## Skill

### Get information from `en_langs`

In [None]:
tw_skill_name_df = pd.DataFrame(tw_parsed_data["skill_name"]).rename(columns={"value": "tw_name"})
tw_skill_desc_df = pd.DataFrame(tw_parsed_data["skill_desc"]).rename(columns={"value": "tw_description"})

In [None]:
tw_skill_df = pd.merge(tw_skill_name_df, tw_skill_desc_df, how="left", left_on="id", right_on="id")

In [None]:
tw_skill_df[tw_skill_df["id"].str.contains("15320")]

Unnamed: 0,id,tw_name,tw_description


In [None]:
en_skill_name_df = pd.DataFrame(en_parsed_data["skill_name"]).rename(columns={"value": "name"})
en_skill_desc_df = pd.DataFrame(en_parsed_data["skill_desc"]).rename(columns={"value": "description"})

In [None]:
en_skill_df = pd.merge(en_skill_name_df, en_skill_desc_df, how="left", left_on="id", right_on="id")

In [None]:
skill_df = pd.merge(tw_skill_df, en_skill_df, how="left", left_on="id", right_on="id")

In [None]:
skill_df["name"] = skill_df["name"].fillna(skill_df["tw_name"])

In [None]:
skill_df["description"] = skill_df["description"].fillna(skill_df["tw_description"])

In [None]:
skill_final_df = skill_df.drop(["tw_name", "tw_description"], axis=1)

In [None]:
skill_final_df

Unnamed: 0,id,name,description
0,100001,Normal Attack,
1,100002,Monster spawned,
2,100003,Gather!!,
3,100004,Test Buff Skill,
4,100010,Ignore defense attack,
...,...,...,...
2051,84020000,Nield's Power,"When attacked, there is a %s chance to cast a ..."
2052,85020000,Moonlight Gift,Removes self debuffs every 6 seconds and becom...
2053,86000000,Wind Revival,"When taking fatal damage, you are saved from d..."
2054,89005000,Hurricane,"In PVP, each skill cast increases your PVP Fin..."


In [None]:
skill_final_df.query("id == 1532001")

Unnamed: 0,id,name,description


### Save data for database

In [None]:
skill_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_tw_{APK_DATE}.csv", index=False)

## Skill Advanced

### Parse data

In [None]:
texts = []
LIMIT = 999999

# with open(f"test.txt", "r", encoding="utf8") as filename:
with open(f"{MAIN_CLEANED_DIR}/data_skill_Skill.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())

  texts = " ".join(texts)

  texts = re.sub(r'\[([\w\"]+)\]=', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

  texts = texts.replace("{}", "[]")
  texts = texts.replace("{ {", "[ {")
  texts = texts.replace("} } }, {", "AAAAA")
  texts = texts.replace("} } } }", "BBBBB")
  texts = texts.replace("} } }", "XXXXX")
  texts = texts.replace("} }, {", "ZZZZZ")
  texts = texts.replace("} }", "YYYYY")
  texts = texts.replace("XXXXX", "} } ]")
  texts = texts.replace("ZZZZZ", "} }, {")
  texts = texts.replace("YYYYY", "} ]")
  texts = texts.replace("AAAAA", "} ] }, {")
  texts = texts.replace("BBBBB", "} ] } ]")

  texts = "{" + texts + "}"
  skills = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
skill_entries = []

for id, parsed_dict in skills.items():
  skill_entry = {}
  default_key_dict = {
      "is_mount_combat": "IsMountCombat",
      "range": "range",
      "is_damage_skill": "IsDamageSkill",
      "require_mount_combat": "RequireMountCombat",
      "skill_group_id" : "SkillGroupID",
      "max_level" : "MaxLevel",
      "pre_skill" : "PreSkill",
      "extra_range_skill_id": "ExtraRangeSkillId",
      "buff_list": "BuffList",
      "type": "Type",
      "cooldown": "CoolDown",
      "related_buff" : "RelatedBuff",
      "pet_skill_type": "PetSkillType",
      "must_equip": "mustEquip",
      "job": "Job",
      "max_hp_cost": "MaxHpCost",
      "skill_id": "SkillId",
      "is_pet_skill_can_use_when_master_die": "IsPetSkillCanUseWhenMasterDie",
      "auto_battle_type": "AutoBattleType",
      "skill_weapon": "SkillWeapon",
      "cost_zeny": "CostZeny",
      "name": "Name",
      "desc_args": "Desc_args",
      "combo": "combo",
      "skill_sketch": "SkillSketch",
      "res_id": "ResID",
      "require_mount_id": "RequireMountID",
      "pet_skill_element": "PetSkillElement",
      "desc": "Desc",
      "fixed_cooldown": "FixedCoolDown",
      "suit_skills_or_not": "SuitSkillsOrNot",
      "pre_item": 'PreItem',
      "cost_item": "CostItem",
      "cost": "Cost",
      "is_show_skill_tree": "isShowSkillTree",

  }
  
  skill_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      skill_entry[col] = parsed_dict[def_key]
    except:
      pass
  skill_entries.append(skill_entry)

skill_df = pd.DataFrame(skill_entries)

### Merge with information from `en_langs`

In [None]:
skill_df = skill_df.fillna("-9999")

#### Skill name


In [None]:
tw_skill_name_df = pd.DataFrame(tw_parsed_data["skill_name"]).rename(columns={"value": "tw_name"})
en_skill_name_df = pd.DataFrame(en_parsed_data["skill_name"]).rename(columns={"value": "name"})

skill_name_df = pd.merge(tw_skill_name_df, en_skill_name_df, how="left", left_on="id", right_on="id")
skill_name_df["name"] = skill_name_df["name"].fillna(skill_name_df["tw_name"])

skill_final_df = skill_name_df.drop(["tw_name"], axis=1)

In [None]:
skill_name_df = skill_final_df[["id", "name"]].rename(columns={"id": "skill_name_id"})

skill_df["skill_name_id"] = skill_df["name"].str.replace("SkillName_", "")
skill_df = skill_df.drop("name", axis=1)
skill_df = pd.merge(skill_df, skill_name_df, how="left", left_on=["skill_name_id"], right_on=["skill_name_id"])

#### Skill description

In [None]:
tw_skill_desc_df = pd.DataFrame(tw_parsed_data["skill_desc"]).rename(columns={"value": "tw_desc"})
en_skill_desc_df = pd.DataFrame(en_parsed_data["skill_desc"]).rename(columns={"value": "desc"})

skill_desc_df = pd.merge(tw_skill_desc_df, en_skill_desc_df, how="left", left_on="id", right_on="id")
skill_desc_df["desc"] = skill_desc_df["desc"].fillna(skill_desc_df["tw_desc"])

skill_final_df = skill_desc_df.drop(["tw_desc"], axis=1)

In [None]:
isnull = skill_df["desc"].isnull()
skill_df.loc[isnull, 'desc'] = pd.Series([[None]] * isnull.sum()).values
skill_df["desc"] = skill_df["desc"].apply(list).str[0]

skill_df["skill_desc_id"] = skill_df["desc"].str.replace("SkillDesc_", "")
skill_df = skill_df.drop("desc", axis=1)

skill_desc_df = skill_final_df[["id", "desc"]].rename(columns={"id": "skill_desc_id"})
skill_df = pd.merge(skill_df, skill_desc_df, how="left", left_on=["skill_desc_id"], right_on=["skill_desc_id"])

In [None]:
skill_df.query("id == 1532001")

Unnamed: 0,id,is_mount_combat,is_damage_skill,require_mount_combat,skill_group_id,max_level,pre_skill,buff_list,type,cooldown,pet_skill_type,must_equip,job,max_hp_cost,skill_id,is_pet_skill_can_use_when_master_die,auto_battle_type,skill_weapon,cost_zeny,desc_args,combo,skill_sketch,res_id,require_mount_id,pet_skill_element,fixed_cooldown,suit_skills_or_not,pre_item,cost_item,cost,is_show_skill_tree,related_buff,range,skill_name_id,name,skill_desc_id,desc
1406,1532001,1,1,0,0,10,[],[],1,"[9000, 12000, 15000, 18000, 21000, 24000, 2700...",1,[],1532,[],1532001,0,9,[{'skillweapon': 311}],[],[],-9999,SkillSketch1522001,1522001,[],0,[6000],0,[],[],"[40, 56, 72, 88, 104, 120, 136, 152, 168, 184]",1,-9999,-9999,1522001,金属摇滚,1532001,對指定目標發出高音，對其造成物理攻擊*<color=#44afee>%s%%</color>...


### Fix missing values

In [None]:
skill_df["is_damage_skill"] = skill_df["is_damage_skill"].fillna(1)

skill_df["type"] = skill_df["type"].fillna(1)

isnull = skill_df["cooldown"].isnull()
skill_df.loc[isnull, 'cooldown'] = pd.Series([[None]] * isnull.sum()).values
skill_df["cooldown"] = skill_df["cooldown"].apply(list).str[0]

isnull = skill_df["cost"].isnull()
skill_df.loc[isnull, 'cost'] = pd.Series([[None]] * isnull.sum()).values
skill_df["cost"] = skill_df["cost"].apply(list).str[0]

skill_df["is_mount_combat"] = skill_df["is_mount_combat"].fillna(0)

skill_df["is_show_skill_tree"] = skill_df["is_show_skill_tree"].fillna(0)

### Select columns for database

In [None]:
skill_final_df = skill_df[["id", "name", "desc", "skill_group_id", "is_damage_skill", "max_level", "type", "cooldown", "job", "res_id", "cost", "is_mount_combat", "is_show_skill_tree", "skill_weapon", "fixed_cooldown", "combo", "range", "require_mount_combat", "pet_skill_type", "is_pet_skill_can_use_when_master_die", "pet_skill_element", "suit_skills_or_not"]]

In [None]:
skill_final_df = skill_final_df.rename(columns={"desc": "description", "skill_group_id": "group_id"})

In [None]:
skill_final_df["skill_weapon"] = skill_final_df["skill_weapon"].apply(lambda x: np.nan if x == [] else x)

In [None]:
skill_final_df["fixed_cooldown"] = skill_final_df["fixed_cooldown"].apply(lambda x: np.nan if x == [] else x[0])

### Save for database

In [None]:
skill_complex_final_df = skill_final_df.drop("skill_weapon", axis=1)

In [None]:
skill_complex_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_complex_tw_{APK_DATE}.csv", index=False)

## Item


### Get item information from `en_langs`

In [None]:
tw_item_name_df = pd.DataFrame(tw_parsed_data["item_name"])
tw_item_desc_df = pd.DataFrame(tw_parsed_data["item_desc"])
tw_item_type_df = pd.DataFrame(tw_parsed_data["item_type"])

In [None]:
en_item_name_df = pd.DataFrame(en_parsed_data["item_name"])
en_item_desc_df = pd.DataFrame(en_parsed_data["item_desc"])
en_item_type_df = pd.DataFrame(en_parsed_data["item_type"])

### Parse data

In [None]:
texts = []
LIMIT = 9999999
# LIMIT = 72566
# with open(f"test.txt", "r", encoding="utf8") as filename:
with open(f"{MAIN_CLEANED_DIR}/data_item_Item.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())
    
  texts = " ".join(texts)

  texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)
  
  # print(texts)

  texts = texts.replace("{ {", "AAAAA")
  texts = texts.replace("}, }, },", "BBBBB")
  texts = texts.replace("}, }, {", "DDDDD")
  texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
  texts = re.sub(r'\}, \},$', 'EEEEE', texts)
  texts = texts.replace("}, },", "CCCCC")

  texts = texts.replace("AAAAA", "[ {")
  texts = texts.replace("BBBBB", "}, }, ],")
  texts = texts.replace("CCCCC", "}, ],")
  texts = texts.replace("DDDDD", "}, }, {")
  texts = texts.replace("EEEEE", "}, },")
  texts = texts.replace("FFFFF", "}, }, ")

  texts = "{" + texts + "}"
  

  # print(texts)

items = literal_eval(texts)
print(len(items))

4711


### Create entries for DataFrame creation

In [None]:
item_entries = []

for id, parsed_dict in items.items():
  item_entry = {}

  default_key_dict = {
      "cd": "cd",
      "item_desc": "itemDesc",
      "item_type": "itemType",
      "max_stack": "maxStack",
      "page": "page",
      "res_id": "resId",
      "stackable": "stackable",
      "weight": "weight",
      "static_id": "staticId",
      "card_attrs": "CardAttrs",
      "card_quality": "cardQuality",
      "card_slots": "cardSlots",
      "deposite_attrs": "DepositeAttrs",
      "is_mvp_card": "IsMvpCard",
      "item_subtype": "itemSubType",
      "min_level": "minLevel",
      "monster_id": "monster_id",
      "name": "name",
      "unlock_adventure_exp": "UnlockAdventureExp",
      "expired_date": "ExpiredDate",
      "is_bind": "isBind",
      "item_expired_type": "itemExpiredType",
      "sub_page": "subPage",
      "is_hide": "isHide",
      "use": "use",
      "show_in_ui": "showInUi",
      "acquire": "Acquire",
      "card_coordinate_point": "CardCoordinatePoint",
      "is_in_collection": "IsInCollection",
      "monster": "Monster",
      "sell_price": "SellPrice",
      "area_id": "AreaId",
      "cd_group_id": 'CdGroupId',
      "cd_type": 'CdType',
      "element": 'Element',
      "exp": 'Exp',
      "fish_rod_type": 'FishRodType',
      "fish_tool_type": 'FishToolType',
      "gift_send_limit": 'GiftSendLimit',
      "if_can_quick_use": 'IfCanQuickUse',
      "if_combined_for_life": 'IfCombinedForLife',
      "if_get_off_mount": 'IfGetOffMount',
      "if_stop_navigation": 'IfStopNavigation',
      "interface_id": 'InterfaceId',
      "is_gift_item": 'IsGiftItem',
      "item_quality": 'ItemQuality',
      "item_static_id": 'ItemStaticId',
      "max_use": 'MaxUse',
      "mine_tool_type": 'MineToolType',
      "npc_id": "NpcId",
      "npc_navigation": 'NpcNavigation',
      "oon_box_loot_bind_status": 'OONBoxLootBindStatus',
      "oon_box_loot_id": 'OONBoxLootId',
      "oon_box_loot_number": 'OONBoxLootNumber',
      "oon_box_loot_type": 'OONBoxLootType',
      "pet_pill_bullet_id": 'PetPillBulletId',
      "pet_pill_fixed_damage_rate": 'PetPillFixedDamageRate',
      "pet_pill_max_damage": 'PetPillMaxDamage',
      "related_activity_type": 'RelatedActivityType',
      "related_pet_skill": 'RelatedPetSkill',
      "scene_id": 'SceneId',
      "sell_navigation": 'SellNavigation',
      "sell_price": 'SellPrice',
      "stall_currency_type": 'StallCurrencyType',
      "stall_item_level": 'StallItemLevel',
      "stall_price_lower_limit": 'StallPriceLowerLimit',
      "stall_price_type": 'StallPriceType',
      "stall_price_upper_limit": 'StallPriceUpperLimit',
      "stall_type": 'StallType',
      "stall_zeny_child_label": 'StallZenyChildLabel',
      "is_all_job": 'isAllJob',
      "is_bind": 'isBind',
      "item_expired_type": 'itemExpiredType',
      "item_subtype_task": 'itemSubTypeTask',
      "job_limit": 'jobLimit',
      "min_level": 'minLevel',
      "pet_cage_type": 'petcagetype',
      "related_gm_activity_type": 'relatedGMActivityType',
      "res_id": 'resId',
      "stackable": 'stackable',
      "static_id": 'staticId',
  }

  item_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      item_entry[col] = parsed_dict[def_key]
    except:
      pass
  item_entries.append(item_entry)

item_df = pd.DataFrame(item_entries)

### Merge with information from `en_langs`

#### Item name


In [None]:
tw_item_name_df = pd.DataFrame(tw_parsed_data["item_name"]).rename(columns={"value": "tw_name"})
tw_item_desc_df = pd.DataFrame(tw_parsed_data["item_desc"]).rename(columns={"value": "tw_description"})

tw_item_df = pd.merge(tw_item_name_df, tw_item_desc_df, how="left", left_on="id", right_on="id")

tw_item_df.head()

en_item_name_df = pd.DataFrame(en_parsed_data["item_name"]).rename(columns={"value": "name"})
en_item_desc_df = pd.DataFrame(en_parsed_data["item_desc"]).rename(columns={"value": "description"})

en_item_df = pd.merge(en_item_name_df, en_item_desc_df, how="left", left_on="id", right_on="id")

In [None]:
item_all_df = pd.merge(tw_item_df, en_item_df, how="left", left_on="id", right_on="id")

item_all_df["name"] = item_all_df["name"].fillna(item_all_df["tw_name"])
item_all_df["description"] = item_all_df["description"].fillna(item_all_df["tw_description"])

In [None]:
item_all_df = item_all_df.drop(["tw_name", "tw_description"], axis=1)

In [None]:
item_all_df["id"] = item_all_df["id"].apply(int)
item_df["id"] = item_df["id"].apply(int)

In [None]:
item_df = pd.merge(item_df, item_all_df, how="left", left_on="id", right_on="id")

In [None]:
item_df = item_df.drop("item_desc", axis=1)

#### Item type

In [None]:
item_df["item_type"] = item_df["item_type"].fillna(0).apply(int)
item_df["item_subtype"] = item_df["item_subtype"].fillna(0).apply(int)
item_df["item_type_subtype"] = item_df.apply(lambda x: f'{x["item_type"]}_{x["item_subtype"]}', axis=1)

item_type_df = en_item_type_df.rename(columns={"id": "item_type_subtype", "value": "item_type_en"})
item_df = pd.merge(item_df, item_type_df, how="left", left_on="item_type_subtype", right_on="item_type_subtype")

### Save raw data

In [None]:
item_df.to_csv(f"{MAIN_PARSED_DIR}/item_raw_{APK_DATE}.csv", index=False)

### Save data for database

In [None]:
item_final_df = item_df[["id", "name_y", "description", "res_id", "item_type_en"]].rename(columns={"name_y": "item_name"})
item_final_df["res_id"] = item_final_df["res_id"].fillna(item_final_df["id"])
item_final_df = item_final_df[~item_final_df["item_name"].isnull()]

item_final_df = item_final_df.rename(columns={
    "item_name": "name",
    "item_desc_en": "description",
    "res_id": "res_id",
    "item_type_en": "type"
    })

item_final_df.loc[item_final_df["name"].str.contains(" Card"), 'res_id'] = 99999
item_final_df["type"] = item_final_df["type"].fillna("Uncategorized")
item_final_df["name"] = item_final_df["name"].str.lower()
item_final_df["is_visible"] = item_final_df["id"].apply(lambda x: 0 if x < 20000 else 1)

item_final_df.to_csv(f"{MAIN_PARSED_DIR}/item_tw_{APK_DATE}.csv", index=False)

## Skill Preitem

### Create entries for dataframe creation

In [None]:
skill_pre_item_records = skill_df[~skill_df["pre_item"].isnull()][["id", "pre_item"]].to_dict(orient="records")

In [None]:
skill_pre_item_fins = []

for record in skill_pre_item_records:
  if record["pre_item"] != {}:
    for pre_item in record["pre_item"]:
      skill_pre_item_fin = {}
      skill_pre_item_fin["skill_id"] = record["id"]
      skill_pre_item_fin["item_id"] = pre_item["ItemId"]
      skill_pre_item_fin["item_num"] = pre_item["Number"]
      
      skill_pre_item_fins.append(skill_pre_item_fin)

In [None]:
skill_pre_item_df = pd.DataFrame(skill_pre_item_fins)

In [None]:
intersections = set(skill_pre_item_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_pre_item_df = skill_pre_item_df[skill_pre_item_df["skill_id"].astype(float).isin(intersections)]

intersections = set(skill_pre_item_df["item_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
skill_pre_item_df = skill_pre_item_df[skill_pre_item_df["item_id"].astype(float).isin(intersections)]

In [None]:
skill_pre_item_final_df = skill_pre_item_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_pre_item_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_pre_item_tw_{APK_DATE}.csv", index=False)

## Skill Buff

### Create entries for dataframe creation

In [None]:
skill_buff_list_records = skill_df[~skill_df["buff_list"].isnull()][["id", "buff_list"]].to_dict(orient="records")

In [None]:
skill_buff_list_fins = []

for record in skill_buff_list_records:
  if record["buff_list"] != {}:
    for buff_list in list(record["buff_list"]):
      skill_buff_list_fin = {}
      skill_buff_list_fin["skill_id"] = record["id"]
      skill_buff_list_fin["buff"] = buff_list
      
      skill_buff_list_fins.append(skill_buff_list_fin)

In [None]:
skill_buff_df = pd.DataFrame(skill_buff_list_fins)

In [None]:
intersections = set(skill_buff_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_buff_df = skill_buff_df[skill_buff_df["skill_id"].astype(float).isin(intersections)]

In [None]:
skill_buff_final_df = skill_buff_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_buff_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_buff_tw_{APK_DATE}.csv", index=False)

## Skill Cost Item

### Create entries for dataframe creation

In [None]:
skill_cost_item_records = skill_df[~skill_df["cost_item"].isnull()][["id", "cost_item"]].to_dict(orient="records")

In [None]:
skill_cost_item_fins = []

for record in skill_cost_item_records:
  if record["cost_item"] != {}:
    for cost_item in record["cost_item"]:
      skill_cost_item_fin = {}
      skill_cost_item_fin["skill_id"] = record["id"]
      skill_cost_item_fin["item_id"] = cost_item["ItemId"]
      skill_cost_item_fin["item_num"] = cost_item["Number"]
      
      skill_cost_item_fins.append(skill_cost_item_fin)

In [None]:
skill_cost_item_df = pd.DataFrame(skill_cost_item_fins)

In [None]:
skill_cost_item_df

Unnamed: 0,skill_id,item_id,item_num
0,199994,10202072,1
1,199995,10202070,1
2,199995,10202071,1
3,199996,10202157,1
4,199997,10202158,1
5,199997,10202159,1
6,199997,10202160,1


In [None]:
intersections = set(skill_cost_item_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_cost_item_df = skill_cost_item_df[skill_cost_item_df["skill_id"].astype(float).isin(intersections)]

intersections = set(skill_cost_item_df["item_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
skill_cost_item_df = skill_cost_item_df[skill_cost_item_df["item_id"].astype(float).isin(intersections)]

In [None]:
skill_cost_item_final_df = skill_cost_item_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_cost_item_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_cost_item_tw_{APK_DATE}.csv", index=False)

## Skill Factor

### Parse data

In [None]:
texts = []

LIMIT = 9029399292
# with open(f"test.txt", "r", encoding="utf8") as filename:
with open(f"{MAIN_CLEANED_DIR}/data_SkillFactor.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())

texts = " ".join(texts)
texts = re.sub(r'([A-Za-z]+) =', '"\g<1>" :', texts)
texts = re.sub(r'\[[\d]+\] = ', '', texts)

# texts += ']'

skill_factors = literal_eval(texts)

In [None]:
skill_factors[0]

{'FactorName': 'FactorName18',
 'FinalFactor': 2,
 'Precent': 0,
 'SkillId': 1200128}

### Create entries for DataFrame

In [None]:
skill_factor_entries = []

for skill_factor in skill_factors:
  skill_factor_entry = {}

  default_key_dict = {
      "id": "Id",
      "factor_name": "FactorName",
      "factor_order": "FactorOrder",
      "final_factor": "FinalFactor",
      "skill_id": "SkillId",
      "skill_level": "SkillLevel"
  }

  try:
    skill_factor_entry["id"] = skill_factor["Id"]
  except:
    pass

  for col, def_key in default_key_dict.items():
    try:
      skill_factor_entry[col] = skill_factor[def_key]
    except:
      pass
  skill_factor_entries.append(skill_factor_entry)

skill_factor_df = pd.DataFrame(skill_factor_entries)

### Fix missing values

In [None]:
skill_factor_df["factor_order"] = skill_factor_df["factor_order"].fillna(1)
skill_factor_df["id"] = skill_factor_df["id"].fillna(1)
skill_factor_df["skill_level"] = skill_factor_df["skill_level"].fillna(1)
skill_factor_df["skill_id"] = skill_factor_df["skill_id"].fillna(0)
skill_factor_df["final_factor"] = skill_factor_df["final_factor"].fillna(20)

### Save for database

In [None]:
intersections = set(skill_factor_df["skill_id"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_factor_final_df = skill_factor_df[skill_factor_df["skill_id"].astype(int).isin(intersections)]

skill_factor_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_factor_tw_{APK_DATE}.csv", index=False)

## Skill Description Arguments

### Create entries for dataframe creation

In [None]:
skill_desc_args_records = skill_df[skill_df["desc_args"].str.len() != 0][["id", "desc_args"]].to_dict(orient="records")

In [None]:
skill_desc_args_fins = []

for record in skill_desc_args_records:
  if record["desc_args"] != []:
    for desc_args in record["desc_args"]:
      skill_desc_args_fin = {}
      skill_desc_args_fin["skill_id"] = record["id"]
      skill_desc_args_fin["factor"] = desc_args["Factor"]
      skill_desc_args_fin["factor_bit"] = desc_args["FactorBit"]
      skill_desc_args_fin["level_type"] = desc_args["LevelType"]

      try:
        skill_desc_args_fin["type"] = desc_args["type"]
      except:
        pass
      
      skill_desc_args_fins.append(skill_desc_args_fin)

skill_desc_args_df = pd.DataFrame(skill_desc_args_fins)

In [None]:
intersections = set(skill_desc_args_df["skill_id"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_factor_final_df = skill_desc_args_df[skill_desc_args_df["skill_id"].astype(int).isin(intersections)]

In [None]:
skill_desc_args_final_df = skill_desc_args_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_desc_args_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_desc_args_tw_{APK_DATE}.csv", index=False)

## Skill Preskill

### Create entries for dataframe creation

In [None]:
skill_pre_skill_records = skill_df[~skill_df["pre_skill"].isnull()][["id", "pre_skill"]].to_dict(orient="records")

skill_pre_skill_fins = []

for record in skill_pre_skill_records:
  if record["pre_skill"] != []:
    for pre_skill in record["pre_skill"]:
      skill_pre_skill_fin = {}
      skill_pre_skill_fin["skill_id"] = record["id"]
      skill_pre_skill_fin["pre_skill"] = pre_skill["SkillId"]
      skill_pre_skill_fin["pre_skill_level"] = pre_skill["SkillLevel"]
      
      skill_pre_skill_fins.append(skill_pre_skill_fin)

skill_pre_skill_df = pd.DataFrame(skill_pre_skill_fins)

skill_pre_skill_df = skill_pre_skill_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_pre_skill_df["pre_skill"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_pre_skill_df = skill_pre_skill_df[skill_pre_skill_df["pre_skill"].astype(int).isin(intersections)]

intersections = set(skill_pre_skill_df["skill_id"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_pre_skill_final_df = skill_pre_skill_df[skill_pre_skill_df["skill_id"].astype(int).isin(intersections)]

### Save for database

In [None]:
skill_pre_skill_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_pre_skill_tw_{APK_DATE}.csv", index=False)

## Skill Required Mount

### Create entries for dataframe creation

In [None]:
skill_require_mount_id_records = skill_df[~skill_df["require_mount_id"].isnull()][["id", "require_mount_id"]].to_dict(orient="records")

skill_require_mount_id_fins = []

for record in skill_require_mount_id_records:
  if record["require_mount_id"] != []:
    for require_mount_id in list(record["require_mount_id"]):
      skill_require_mount_id_fin = {}
      skill_require_mount_id_fin["skill_id"] = record["id"]
      skill_require_mount_id_fin["require_mount_id"] = require_mount_id
      
      skill_require_mount_id_fins.append(skill_require_mount_id_fin)

skill_require_mount_id_df = pd.DataFrame(skill_require_mount_id_fins)

skill_require_mount_id_df = skill_require_mount_id_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_require_mount_id_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_require_mount_id_final_df = skill_require_mount_id_df[skill_require_mount_id_df["skill_id"].astype(float).isin(intersections)]

# intersections = set(skill_require_mount_id_df["require_mount_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
# skill_require_mount_id_final_df = skill_require_mount_id_df[skill_require_mount_id_df["require_mount_id"].astype(float).isin(intersections)]

### Save for database

In [None]:
skill_require_mount_id_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_require_mount_id_tw_{APK_DATE}.csv", index=False)

## Skill Max HP Cost

### Create entries for dataframe creation

In [None]:
skill_max_hp_cost_records = skill_df[~skill_df["max_hp_cost"].isnull()][["id", "max_hp_cost"]].to_dict(orient="records")

skill_max_hp_cost_fins = []

for record in skill_max_hp_cost_records:
  if record["max_hp_cost"] != []:
    for max_hp_cost in list(record["max_hp_cost"]):
      skill_max_hp_cost_fin = {}
      skill_max_hp_cost_fin["skill_id"] = record["id"]
      skill_max_hp_cost_fin["max_hp_cost"] = max_hp_cost
      
      skill_max_hp_cost_fins.append(skill_max_hp_cost_fin)

skill_max_hp_cost_df = pd.DataFrame(skill_max_hp_cost_fins)

skill_max_hp_cost_df = skill_max_hp_cost_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_max_hp_cost_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_max_hp_cost_final_df = skill_max_hp_cost_df[skill_max_hp_cost_df["skill_id"].astype(float).isin(intersections)]

### Save for database

In [None]:
skill_max_hp_cost_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_max_hp_cost_tw_{APK_DATE}.csv", index=False)

## Skill Zeny Cost

### Create entries for dataframe creation

In [None]:
skill_cost_zeny_records = skill_df[~skill_df["cost_zeny"].isnull()][["id", "cost_zeny"]].to_dict(orient="records")

skill_cost_zeny_fins = []

for record in skill_cost_zeny_records:
  if record["cost_zeny"] != []:
    for cost_zeny in list(record["cost_zeny"]):
      skill_cost_zeny_fin = {}
      skill_cost_zeny_fin["skill_id"] = record["id"]
      skill_cost_zeny_fin["cost_zeny"] = cost_zeny
      
      skill_cost_zeny_fins.append(skill_cost_zeny_fin)

skill_cost_zeny_df = pd.DataFrame(skill_cost_zeny_fins)

skill_cost_zeny_df = skill_cost_zeny_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_cost_zeny_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_zeny_cost_final_df = skill_cost_zeny_df[skill_cost_zeny_df["skill_id"].astype(float).isin(intersections)]

### Save for database

In [None]:
skill_zeny_cost_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_zeny_cost_tw_{APK_DATE}.csv", index=False)

## Equip Suit

### Parse data

In [None]:
texts = []

LIMIT = 328932992
with open(f"{MAIN_CLEANED_DIR}/data_equip_EquipmentSuit.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())

texts = " ".join(texts)
texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

equipment_suits = literal_eval(texts)    

### Create entries for DataFrame creation

In [None]:
equipment_suit_entries = []

for id, parsed_dict in equipment_suits.items():
  equipment_suit_entry = {}

  default_key_dict = {
      "id": "ID",
      "argument_id": "argumentID",
      "argument_order": "argumentOrder",
      "argument_value": "argumentValue",
      "equip_id": "equip_id",
      "name": "name",
      "skill_id": "skillId",
      "suit_id": "suitId",
      "suit_num": "suitNum"
  }
  equipment_suit_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      equipment_suit_entry[col] = parsed_dict[def_key]
    except:
      pass
  equipment_suit_entries.append(equipment_suit_entry)

equip_suit_raw_df = pd.DataFrame(equipment_suit_entries)

In [None]:
equip_suit_raw_df["argument_value"] = equip_suit_raw_df["argument_value"].apply(list)

### Break entries down

In [None]:
equip_suit_raw_df = equip_suit_raw_df[~equip_suit_raw_df["equip_id"].isnull()]

isnull =equip_suit_raw_df["argument_value"].isnull()
equip_suit_raw_df.loc[isnull, 'argument_value'] = pd.Series([[None]] * isnull.sum()).values

isnull =equip_suit_raw_df["argument_order"].isnull()
equip_suit_raw_df.loc[isnull, 'argument_order'] = pd.Series([[None]] * isnull.sum()).values

equip_suit_rec_df = equip_suit_raw_df[["id", "name", "skill_id", "equip_id", "argument_order", "argument_value", "suit_num"]]
equip_suit_recs = equip_suit_rec_df.to_dict(orient="records")

equip_suit_rec_news = []

for equip_suit_rec in equip_suit_recs:
  for i, equip_id in enumerate(equip_suit_rec["equip_id"]):
    new_entry = {}
    new_entry["id"] = equip_suit_rec["id"]
    new_entry["equip_id"] = equip_id
    new_entry["argument_value"] = equip_suit_rec["argument_value"][0]
    new_entry["argument_order"] = equip_suit_rec["argument_order"][0]
    new_entry["name"] = equip_suit_rec["name"]
    new_entry["skill_id"] = equip_suit_rec["skill_id"]
    new_entry["suit_num"] = equip_suit_rec["suit_num"]
  
    equip_suit_rec_news.append(new_entry)

equip_suit_df = pd.DataFrame(equip_suit_rec_news)

### Data manipulation

In [None]:
equip_suit_manip_df = equip_suit_df[["argument_value", "name", "skill_id", "suit_num"]].drop_duplicates().sort_values("name")

In [None]:
equip_suit_manip_df["skill_id"] = equip_suit_manip_df["skill_id"].fillna("81010000")
equip_suit_manip_df = equip_suit_manip_df[~equip_suit_manip_df["name"].isnull()]
equip_suit_manip_df = equip_suit_manip_df[~equip_suit_manip_df["name"].str.startswith("Fashion_ShowEffect")]

In [None]:
def get_blue_argument_value(x):

  try:
    match = re.match("SuitName(\w+)", x["name"])
    equipment_suit_code = int(match.group(1))
  except:
    pass
    # match = re.match("Fashion_ShowEffect(\w+)", x["name"])
    # equipment_suit_code = int(match.group(1))
  
  if 100 < equipment_suit_code and equipment_suit_code < 500:
    if int(x["skill_id"]) == 81010000:
      return (equipment_suit_code - 99) * 0.005
    if int(x["skill_id"]) == 81010001:
      return 0.20 + ((equipment_suit_code - 99) * 0.05)
    if int(x["skill_id"]) == 81010002:
      return (equipment_suit_code - 99) * 0.05
  elif equipment_suit_code == 4:
    return 0.1
  elif equipment_suit_code == 6000:
    return 0.25
  elif equipment_suit_code == 8114:
    return 0.1
  elif equipment_suit_code == 1020:
    if int(x["suit_num"]) == 3:
      return 0.05
    elif int(x["suit_num"]) == 6:
      return 0.075
    elif int(x["suit_num"]) == 8:
      return 0.1
  elif equipment_suit_code == 2020:
    if int(x["suit_num"]) == 3:
      return 0.1
    elif int(x["suit_num"]) == 6:
      return 0.15
    elif int(x["suit_num"]) == 8:
      return 0.2
  elif equipment_suit_code == 2060:
    if int(x["suit_num"]) == 3:
      return 0.05
    elif int(x["suit_num"]) == 6:
      return 0.075
    elif int(x["suit_num"]) == 8:
      return 0.1
  elif equipment_suit_code == 2070:
    if int(x["suit_num"]) == 3:
      return 0.05
    elif int(x["suit_num"]) == 6:
      return 0.075
    elif int(x["suit_num"]) == 8:
      return 0.1
  elif equipment_suit_code == 3020:
    if int(x["suit_num"]) == 3:
      return 0.03
    elif int(x["suit_num"]) == 6:
      return 0.04
    elif int(x["suit_num"]) == 8:
      return 0.05
  elif equipment_suit_code == 4020:
    if int(x["suit_num"]) == 3:
      return 0.1
    elif int(x["suit_num"]) == 6:
      return 0.15
    elif int(x["suit_num"]) == 8:
      return 0.2
  elif equipment_suit_code == 5020:
    if int(x["suit_num"]) == 3:
      return 0.03
    elif int(x["suit_num"]) == 6:
      return 0.04
    elif int(x["suit_num"]) == 8:
      return 0.05

  return np.nan 

In [None]:
equip_suit_manip_df["suit_num"] = equip_suit_manip_df["suit_num"].fillna(3)

equip_suit_manip_df["init_argument_value"] = equip_suit_manip_df.apply(get_blue_argument_value, axis=1)
equip_suit_manip_df["argument_order"] = 1
equip_suit_manip_df = equip_suit_manip_df[~equip_suit_manip_df["init_argument_value"].isnull()]

#### Manipulate `SuitName101`

In [None]:
equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName102'")
equip_suit_manip_copy_df["name"] = 'SuitName101'

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.005
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.25
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.05

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df, ignore_index=True)

#### Manipulate `SuitName2020`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName2020", "argument_order"] = 2

equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName2020'")

equip_suit_manip_copy_df["argument_order"] = 1

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df, ignore_index=True)

#### Manipulate `SuitName2060`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName2060", "argument_order"] = 3

equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName2060'")

equip_suit_manip_copy_df["argument_order"] = 1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 2
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

equip_suit_manip_copy_df["argument_order"] = 2
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

#### Manipulate `SuitName2070`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName2070", "argument_order"] = 2

equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName2070'")

equip_suit_manip_copy_df["argument_order"] = 1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.2
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.15
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

equip_suit_manip_copy_df["argument_order"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

#### Manipulate `SuitName4020`

In [None]:
equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName4020'")

equip_suit_manip_copy_df["argument_order"] = 2
equip_suit_manip_copy_df["init_argument_value"] = 1
equip_suit_manip_copy_df["argument_value"] = 0.5

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

#### Manipulate `SuitName5020`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName5020", "argument_order"] = 2
equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName5020'")

equip_suit_manip_copy_df["argument_order"] = 1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

### Merge with information from `en_langs`

#### Suit name

In [None]:
equip_suit_manip_df["name"] = equip_suit_manip_df["name"].apply(lambda x: int(x.replace("SuitName", "")))
equip_suit_manip_df = equip_suit_manip_df.rename(columns={"name": "suit_id"})

equip_suit_fin_df = equip_suit_manip_df
equip_suit_fin_df = equip_suit_fin_df.drop_duplicates().reset_index(drop=True)

suit_name_df = pd.DataFrame(en_parsed_data["suit_name"])
suit_name_df = suit_name_df.rename(columns={"id": "suit_id", "value": "name"})
suit_name_df["suit_id"] = suit_name_df["suit_id"].apply(int)

equip_suit_fin_df = pd.merge(equip_suit_fin_df, suit_name_df, how="left", left_on="suit_id", right_on="suit_id")

### Save data for database

#### Equipment suit

In [None]:
equip_suit_df = equip_suit_fin_df[["suit_id", "name"]].drop_duplicates().reset_index(drop=True).rename(columns={"suit_id": "id"})

In [None]:
equip_suit_final_df = equip_suit_df.copy()

In [None]:
equip_suit_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_suit_tw_{APK_DATE}.csv", index=False)

#### Equipment Suit Skill

In [None]:
 equip_suit_skill_df = equip_suit_fin_df[["suit_id", "skill_id", "suit_num"]].drop_duplicates(["suit_id", "skill_id", "suit_num"]).reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(equip_suit_skill_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
equip_suit_skill_final_df = equip_suit_skill_df[equip_suit_skill_df["skill_id"].astype(float).isin(intersections)]

In [None]:
equip_suit_skill_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_suit_skill_tw_{APK_DATE}.csv", index=False)

#### Equipment Suit Skill Argument

In [None]:
equip_suit_skill_arg_df = pd.merge(equip_suit_fin_df, equip_suit_skill_final_df, how="left", left_on=["suit_id", "skill_id", "suit_num"], right_on=["suit_id", "skill_id", "suit_num"]).rename(columns={"id": "suit_skill_id"})

In [None]:
equip_suit_skill_arg_df = equip_suit_skill_arg_df[["suit_skill_id", "argument_order", "init_argument_value", "argument_value"]].reset_index().rename(columns={"index": "id"})

In [None]:
equip_suit_skill_arg_final_df = equip_suit_skill_arg_df.drop_duplicates(["suit_skill_id",	"argument_order"], keep="first")

In [None]:
equip_suit_skill_arg_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_suit_skill_arg_tw_{APK_DATE}.csv", index=False)

## Equip

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_equip_Equip.bytes", "r", encoding="utf-8") as filename:

  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)
texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"


equips = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
equip_entries = []

for id, parsed_dict in equips.items():
  equip_entry = {}

  default_key_dict = {
      "sell_price": 'SellPrice',
      "base_prop": 'baseProperty',
      "name": "name",
      "desc": 'desc',
      "type": 'equipmentType',
      "improved_level": 'improvedLevel',
      "init_holes": 'initHoles',
      "is_all_job": 'isAllJob',
      "is_bind": 'isBind',
      "is_fashion": 'isFashion',
      "job_limit": 'jobLimit',
      "max_holes": 'maxHoles',
      "min_level_limit" : 'minLvLimit',
      "prop_level": 'propLevel',
      "quality": 'quality',
      "res_id": 'resId',
      "trade": 'trade',
      "refine_id": "RefineID",
      "static_id": 'staticId',
      "wardrobe_value":'wardrobeValue',
      "decomposition_output_id": "DecompositionOutputId"
  }

  equip_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      equip_entry[col] = parsed_dict[def_key]
    except:
      pass
  equip_entries.append(equip_entry)

equip_df = pd.DataFrame(equip_entries)

### Get equipment information from `en_langs`

In [None]:
tw_equip_name_df = pd.DataFrame(tw_parsed_data["equip_name"]).rename(columns={"value": "tw_value"})
en_equip_name_df = pd.DataFrame(en_parsed_data["equip_name"])
equip_name_df = pd.merge(tw_equip_name_df, en_equip_name_df, how="left", left_on="id", right_on="id")

equip_name_df["value"] = equip_name_df["value"].fillna(equip_name_df["tw_value"])
equip_name_df = equip_name_df.drop("tw_value", axis=1)

In [None]:
tw_equip_desc_df = pd.DataFrame(tw_parsed_data["equip_desc"]).rename(columns={"value": "tw_value"})
en_equip_desc_df = pd.DataFrame(en_parsed_data["equip_desc"])
equip_desc_df = pd.merge(tw_equip_desc_df, en_equip_desc_df, how="left", left_on="id", right_on="id")

equip_desc_df["value"] = equip_desc_df["value"].fillna(equip_desc_df["tw_value"])
equip_desc_df = equip_desc_df.drop("tw_value", axis=1)

In [None]:
tw_equip_type_df = pd.DataFrame(tw_parsed_data["equip_type"]).rename(columns={"value": "tw_value"})
en_equip_type_df = pd.DataFrame(en_parsed_data["equip_type"])
equip_type_df = pd.merge(tw_equip_type_df, en_equip_type_df, how="left", left_on="id", right_on="id")

equip_type_df["value"] = equip_type_df["value"].fillna(equip_type_df["tw_value"])
equip_type_df = equip_type_df.drop("tw_value", axis=1)

In [None]:
equipment_attr_desc_df = pd.DataFrame(en_parsed_data["property"])

### Select columns for database

In [None]:
equip_all_df = equip_df[["id", "name", "desc", "type", "init_holes", "is_bind", "max_holes", "res_id", "static_id", "improved_level", "min_level_limit", "prop_level", "quality", "refine_id", "sell_price", "is_all_job", "is_fashion", "wardrobe_value", "decomposition_output_id"]]

### Merge with information from `en_langs`

#### Equip name

In [None]:
equip_name_df["id"] = equip_name_df["id"].apply(lambda x: f"EquipName_{x}")
equip_name_df = equip_name_df.rename(columns={"id": "name", "value":"equipment_name"})
equip_all_df = pd.merge(equip_all_df, equip_name_df, how="left", left_on=["name"], right_on=["name"])

#### Equip description

In [None]:
equip_desc_df["id"] = equip_desc_df["id"].apply(lambda x: f"EquipDesc_{x}")
equip_desc_df = equip_desc_df.rename(columns={"id": "desc", "value":"equipment_desc"})
equip_all_df = pd.merge(equip_all_df, equip_desc_df, how="left", left_on=["desc"], right_on=["desc"])

#### Equip type

In [None]:
equip_type_df
equip_all_df["type"] = equip_all_df["type"].astype(float)
equip_type_df = equip_type_df.rename(columns={"id": "type", "value":"equipment_type"})
equip_type_df["type"] = equip_type_df["type"].astype(float)
equip_all_df = pd.merge(equip_all_df, equip_type_df, how="left", left_on=["type"], right_on=["type"])

In [None]:
equip_type_df.loc[ equip_type_df["type"] == 308, "equipment_type"] = "Weapon - Knuckles"

In [None]:
equip_type_df

Unnamed: 0,type,equipment_type
0,100.0,Weapon - Basic Dagger
1,101.0,Weapon - One-Handed Sword
2,102.0,Weapon - One-Handed Axe
3,103.0,Weapon - One-Handed Rod
4,104.0,Weapon - Mace
5,105.0,Weapon - Dagger
6,106.0,Weapon - Knuckles
7,107.0,Weapon - Book
8,201.0,Weapon - Shield
9,301.0,Weapon - Two-Handed Sword


### Filter column for database

In [None]:
equip_final_df = equip_all_df[["id", "equipment_name", "equipment_desc", "equipment_type", "static_id", "res_id", "init_holes", "max_holes", "min_level_limit", "prop_level", "quality", "refine_id", "sell_price", "is_all_job", "wardrobe_value", "improved_level", "decomposition_output_id"]]

### Data correction

In [None]:
equip_final_df["equipment_type"] = equip_final_df["equipment_type"].fillna("Accessory - Decoration")
equip_final_df = equip_final_df[~equip_final_df["equipment_name"].isnull()]
equip_final_df["equipment_name"] = equip_final_df["equipment_name"].str.replace("I", "I")
equip_final_df["equipment_group"] = equip_final_df["equipment_name"].str.replace(" III", "").str.replace(" II", "").str.replace(" IV", "").str.replace(" VI", "").str.replace(" V", "").str.replace(" I", "")
equip_final_df["quality"] = equip_final_df["quality"].fillna(1)
equip_final_df["improved_level"] = equip_final_df["improved_level"].fillna(0)
equipment_group_df = equip_final_df.query("improved_level == 0")[["equipment_group", "static_id"]].rename(columns={"static_id":"correct_static_id"})
equip_final_df = pd.merge(equip_final_df, equipment_group_df, how="left", left_on=["equipment_group"], right_on=["equipment_group"])
equip_final_df["static_id"] = equip_final_df["correct_static_id"]
equip_final_df = equip_final_df.drop_duplicates("id", keep="first")

### Add equipment suit info

In [None]:
def get_final_level(x):
  return (x["prop_level"] - (x["improved_level"]*10))

equip_final_df["final_level"] = equip_final_df.apply(get_final_level, axis=1)

def assign_equipment_suit(x):
  final_level = x["final_level"]
  quality = x["quality"]

  if quality == 1:
    if final_level == 30: 
      return 1020
    elif final_level == 40:
      return 2020
    elif final_level == 50:
      return 3020
    elif final_level == 60:
      return 4020
    elif final_level == 70:
      return 5020
    elif final_level == 80:
      return 2060
    elif final_level == 90:
      return 2070
    elif final_level == 100:
      return 2080
    elif final_level == 110:
      return 2090
    elif final_level == 120:
      return 2100
  elif quality == 2:
    return 100 + (final_level - 20) / 10
  else:
    return np.nan

equip_final_df["suit_id"] = equip_final_df.apply(assign_equipment_suit, axis=1)

In [None]:
equip_final_df.loc[equip_final_df["suit_id"] == 2080, "suit_id"] = np.nan

In [None]:
equip_final_df.shape

(3934, 21)

In [None]:
equip_final_temp_df = equip_final_df[equip_final_df["suit_id"].isnull()]

In [None]:
intersections = set(equip_final_df["suit_id"].astype(float)).intersection(set(equip_suit_final_df["id"].astype(float)))
equip_final_df = equip_final_df[equip_final_df["suit_id"].astype(float).isin(intersections)]

In [None]:
equip_final_df = equip_final_df.append(equip_final_temp_df, ignore_index=True)

### Save data for database

In [None]:
equip_final_df = equip_final_df.rename(columns={
    "equipment_name": "name",
    "equipment_desc": "description",
    "equipment_type": "type",
    "decomposition_output_id": "decomposition_id"
}).drop(["equipment_group", "correct_static_id", "final_level"], axis=1)

equip_final_df["name"] = equip_final_df["name"].str.lower()
equip_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_tw_{APK_DATE}.csv", index=False)

## Equip Attributes

### Get data from Equip

In [None]:
new_records = []

records = equip_df[["id", "base_prop"]].to_dict(orient="records")

for record in records:
  
  if type(record["base_prop"]) == dict:
    for attr, value in record["base_prop"].items():
      new_record = {}

      new_record["id"] = record["id"]
      new_record["attr"] = attr
      new_record["value"] = value

      new_records.append(new_record)
  else:
    new_record = {}

    new_record["id"] = record["id"]
    new_record["attr"] = np.nan
    new_record["value"] = np.nan

    new_records.append(new_record)

equipment_attributes_df = pd.DataFrame(new_records)

### Merge with information from `en_langs`

#### Attribute Description

In [None]:
equipment_attr_desc_df = equipment_attr_desc_df.rename(columns={"id": "attr", "value": "attributes"})

equipment_attr_desc_df["attr"] = equipment_attr_desc_df["attr"].astype(float)
equipment_attributes_df["attr"] = equipment_attributes_df["attr"].astype(float)

equipment_attr_fin_df = pd.merge(equipment_attributes_df, equipment_attr_desc_df, how="left", left_on=["attr"], right_on=["attr"])
equipment_attr_fin_df = equipment_attr_fin_df[["id", "attributes", "value"]]

### Sava data for database

In [None]:
equipment_attr_fin_df = equipment_attr_fin_df.rename(columns={
    "id": "equip_id",
    "attributes": "attribute",
    "value": "attribute_value"
})

equipment_attr_fin_df = equipment_attr_fin_df.reset_index(drop=True)
equipment_attr_fin_df["id"] = pd.Series(range(0, equipment_attr_fin_df.shape[0]))

intersections = set(equipment_attr_fin_df["equip_id"].astype(int)).intersection(set(equip_final_df["id"].astype(int)))
equipment_attributes_final_df = equipment_attr_fin_df[equipment_attr_fin_df["equip_id"].astype(int).isin(intersections)]

In [None]:
equipment_attributes_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_attributes_tw_{APK_DATE}.csv", index=False)

## Material

### Get equip and item data

In [None]:
material_one_df = item_final_df[["id", "name", "description", "res_id"]]
material_one_df["is_item"] = 1
material_two_df = equip_final_df[["id", "name", "description", "res_id"]]
material_two_df["is_item"] = 0

### Data correction

In [None]:
material_final_df = material_one_df.append(material_two_df, ignore_index=True)
material_final_df = material_one_df.copy()
material_final_df["name"] = material_final_df["name"].str.lower()

material_final_df.loc[material_final_df["name"].str.contains("card"), 'res_id'] = 99999

### Save data for database

In [None]:
material_final_df.to_csv(f"{MAIN_PARSED_DIR}/material_tw_{APK_DATE}.csv", index=False)

In [None]:
material_df = material_final_df.copy()

## Job

### Get data from `en_langs`

In [None]:
tw_job_name_df = pd.DataFrame(tw_parsed_data["job_name"]).rename(columns={"value": "tw_name"})
tw_job_df = tw_job_name_df

en_job_name_df = pd.DataFrame(en_parsed_data["job_name"]).rename(columns={"value": "name"})
en_job_df = en_job_name_df

job_all_df = pd.merge(tw_job_df, en_job_df, how="left", left_on="id", right_on="id")
job_all_df["name"] = job_all_df["name"].fillna(job_all_df["tw_name"])

In [None]:
job_final_df = job_all_df.copy()

In [None]:
job_final_df = job_final_df.drop("tw_name", axis=1)

### Save data for database

In [None]:
job_final_df.to_csv(f"{MAIN_PARSED_DIR}/job_tw_{APK_DATE}.csv", index=False)

## Monster

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_monster_Monster.bytes", "r", encoding="utf-8") as filename:

  for text in filename.readlines():
    texts.append(text.strip())
    
  texts = " ".join(texts)

  texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)
  
  # print(texts)

  texts = texts.replace("{ {", "AAAAA")
  texts = texts.replace("}, }, },", "BBBBB")
  texts = texts.replace("}, }, {", "DDDDD")
  texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
  texts = re.sub(r'\}, \},$', 'EEEEE', texts)
  texts = texts.replace("}, },", "CCCCC")

  texts = texts.replace("AAAAA", "[ {")
  texts = texts.replace("BBBBB", "}, }, ],")
  texts = texts.replace("CCCCC", "}, ],")
  texts = texts.replace("DDDDD", "}, }, {")
  texts = texts.replace("EEEEE", "}, },")
  texts = texts.replace("FFFFF", "}, }, ")

  texts = "{" + texts + "}"

monsters = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
monster_entries = []

for id, parsed_dict in monsters.items():
  monster_entry = {}

  default_key_dict = {
      'name': 'name',
      'Desc': 'Desc',
      'DpsTestId': 'DpsTestId',
      'DropListKV': 'DropListKV',
      'EffectHang': 'EffectHang',
      'EffectId': 'EffectId',
      'EffectScale': 'EffectScale',
      'ExtraEffect': 'ExtraEffect',
      'HasDieEffect': 'HasDieEffect',
      'IfCanShowInDpsTest': 'IfCanShowInDpsTest',
      'IfIgnoreInvisibility': 'IfIgnoreInvisibility',
      'IsSpecialSkillNotTarget': 'IsSpecialSkillNotTarget',
      'MagicMap': 'MagicMap',
      'MvpRankDrop': 'MvpRankDrop',
      'MvpRareDrop': 'MvpRareDrop',
      'PatrolPos': 'PatrolPos',
      'PetId': 'PetId',
      'RareDropType': 'RareDropType',
      'RingScale': 'RingScale',
      'Scale': 'Scale',
      'ShowRing': 'ShowRing',
      'SkillEffect': 'SkillEffect',
      'Weather': 'Weather',
      'alertRange': 'alertRange',
      'aniSpeed': 'aniSpeed',
      'attackSpeedIncrease': 'attackSpeedIncrease',
      'baseExp': 'baseExp',
      'bloodNums': 'bloodNums',
      'bodily': 'bodily',
      'bornSkillId': 'bornSkillId',
      'bornSound': 'bornSound',
      'bronEffectId': 'bronEffectId',
      'calDamageToCreator': 'calDamageToCreator',
      'canBattleTeleport': 'canBattleTeleport',
      'cantSelect': 'cantSelect',
      'castSkillRate': 'castSkillRate',
      'chaseRange': 'chaseRange',
      'criticalLevel': 'criticalLevel',
      'criticalRate': 'criticalRate',
      'criticalResistanceLevel': 'criticalResistanceLevel',
      'criticalResistanceRate': 'criticalResistanceRate',
      'criticalResistanceValue': 'criticalResistanceValue',
      'criticalValue': 'criticalValue',
      'dialogueBubbleList': 'dialogueBubbleList',
      'dieEffectPath': 'dieEffectPath',
      'dieSound': 'dieSound',
      'dieSoundTime': 'dieSoundTime',
      'dodgeLevel': 'dodgeLevel',
      'dodgeRate': 'dodgeRate',
      'dropAnnouncementId': 'dropAnnouncementId',
      'finalMagicDefenseIncrease': 'finalMagicDefenseIncrease',
      'finalPhysicDefenseIncrease': 'finalPhysicDefenseIncrease',
      'fixedMagicDamage': 'fixedMagicDamage',
      'fixedMagicDamageReduce': 'fixedMagicDamageReduce',
      'fixedPhysicDamage': 'fixedPhysicDamage',
      'fixedPhysicDamageReduce': 'fixedPhysicDamageReduce',
      'followType': 'followType',
      'forceType': 'forceType',
      'hasWhiteEffect': 'hasWhiteEffect',
      'hitIncrease': 'hitIncrease',
      'hitLevel': 'hitLevel',
      'id': 'id',
      'idleSound': 'idleSound',
      'ifActive': 'ifActive',
      'ifChangeTarget': 'ifChangeTarget',
      'ifControlledByPunishment': 'ifControlledByPunishment',
      'ifSelectPlayerFirst': 'ifSelectPlayerFirst',
      'isBeHitBack': 'isBeHitBack',
      'isHideBlood': 'isHideBlood',
      'isHideName': 'isHideName',
      'isIgnoreForceAttack': 'isIgnoreForceAttack',
      'isKeyMonster': 'isKeyMonster',
      'isLevelEffect': 'isLevelEffect',
      'isResetStateLeaveBattle': 'isResetStateLeaveBattle',
      'isShowInMap': 'isShowInMap',
      'isUnmove': 'isUnmove',
      'jobExp': 'jobExp',
      'level': 'level',
      'lootForAll': 'lootForAll',
      'magicDamageIncrease': 'magicDamageIncrease',
      'magicDamagedIncrease': 'magicDamagedIncrease',
      'magicDefenseLevel': 'magicDefenseLevel',
      'magicDps': 'magicDps',
      'magicPenetrationIncrease': 'magicPenetrationIncrease',
      'magicPenetrationLevel': 'magicPenetrationLevel',
      'magicRebound': 'magicRebound',
      'maxHp': 'maxHp',
      'monsterCollectionId': 'monsterCollectionId',
      'monsterTypeForServer': 'monsterTypeForServer',
      'mvpDropId': 'mvpDropId',
      'name': 'name',
      'nameLocalized': 'nameLocalized',
      'navPos': 'navPos',
      'navSceneId': 'navSceneId',
      'navScenesId': 'navScenesId',
      'overChaseChangeHatred': 'overChaseChangeHatred',
      'patrolRange': 'patrolRange',
      'patrolSpeed': 'patrolSpeed',
      'patrolType': 'patrolType',
      'physicDamageIncrease': 'physicDamageIncrease',
      'physicDamagedIncrease': 'physicDamagedIncrease',
      'physicDefenseLevel': 'physicDefenseLevel',
      'physicDps': 'physicDps',
      'physicPenetrationIncrease': 'physicPenetrationIncrease',
      'physicPenetrationLevel': 'physicPenetrationLevel',
      'property': 'property',
      'race': 'race',
      'radius': 'radius',
      'readAttrFrom': 'readAttrFrom',
      'rebound': 'rebound',
      'resId': 'resId',
      'runSound': 'runSound',
      'showMiniProfile': 'showMiniProfile',
      'skills': 'skills',
      'soundVolume': 'soundVolume',
      'speed': 'speed',
      'staticId': 'staticId',
      'tips': 'tips',
      'type': 'type',
      'magicVampire': 'magicVampire',
      'vampire': 'vampire',
      'zeny': 'zeny'
  }

  monster_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      monster_entry[col] = parsed_dict[def_key]
    except:
      pass
  monster_entries.append(monster_entry)

monster_info_df = pd.DataFrame(monster_entries)

### Merge with information from `en_langs`

#### Monster name

In [None]:
tw_monster_name_df = pd.DataFrame(tw_parsed_data["monster_name"]).rename(columns={"value": "tw_name"})
tw_monster_df = tw_monster_name_df

en_monster_name_df = pd.DataFrame(en_parsed_data["monster_name"]).rename(columns={"value": "name"})
en_monster_df = en_monster_name_df

monster_all_df = pd.merge(tw_monster_df, en_monster_df, how="left", left_on="id", right_on="id")
monster_all_df["name"] = monster_all_df["name"].fillna(monster_all_df["tw_name"])

In [None]:
monster_all_df = monster_all_df.drop("tw_name", axis=1)

In [None]:
monster_all_df = monster_all_df.rename(columns={"id": "name_id", "name": "en_name"})
monster_en_name_df = monster_all_df.copy()

In [None]:
# monster_en_name_df = pd.DataFrame(en_parsed_data["monster_name"])
# monster_en_name_df = monster_en_name_df.rename(columns={"id": "name_id", "value": "en_name"})

monster_info_df["name_id"] = monster_info_df["name"].apply(lambda x: str(x).replace('"', '').replace('MonsterName', ""))
monster_info_df = pd.merge(monster_info_df, monster_en_name_df, how="left", left_on="name_id", right_on="name_id")

#### Monster description

In [None]:
tw_monster_desc_df = pd.DataFrame(tw_parsed_data["monster_desc"]).rename(columns={"value": "tw_desc"})
tw_monster_df = tw_monster_desc_df

en_monster_desc_df = pd.DataFrame(en_parsed_data["monster_desc"]).rename(columns={"value": "desc"})
en_monster_df = en_monster_desc_df

monster_all_df = pd.merge(tw_monster_df, en_monster_df, how="left", left_on="id", right_on="id")
monster_all_df["desc"] = monster_all_df["desc"].fillna(monster_all_df["tw_desc"])

monster_all_df = monster_all_df.drop("tw_desc", axis=1)

monster_all_df = monster_all_df.rename(columns={"id": "desc_id", "desc": "en_desc"})
monster_en_desc_df = monster_all_df.copy()

In [None]:
# monster_en_desc_df = pd.DataFrame(parsed_data["monster_desc"])
monster_en_desc_df["en_desc"] = monster_en_desc_df["en_desc"].apply(lambda x: str(x).replace("<color=#FFFFFF00>jayw</color>", ""))
monster_en_desc_df["en_desc"] = monster_en_desc_df["en_desc"].apply(lambda x: re.sub(r'\<[\/\w\=\#]*\>', '', x))
monster_en_desc_df["en_desc"] = monster_en_desc_df["en_desc"].apply(lambda x: str(x).replace("\\n", "").replace("\\", ""))
# monster_en_desc_df = monster_en_desc_df.rename(columns={"id": "desc_id", "value": "en_desc"})
monster_info_df["desc_id"] = monster_info_df["Desc"].apply(lambda x: str(x).replace('"', '').replace('MonsterCollection', ""))
monster_info_df = pd.merge(monster_info_df, monster_en_desc_df, how="left", left_on="desc_id", right_on="desc_id")

### Filter to include field monster only

In [None]:
monster_df = monster_info_df[(~monster_info_df["DropListKV"].isnull()) & (~monster_info_df["navPos"].isnull()) & (~monster_info_df["en_name"].isnull()) & (~monster_info_df["en_name"].isnull())]
monster_df = monster_df[monster_df["level"] <= 120]
monster_df = monster_df[monster_df["id"] <= 20000]

monster_df = monster_df.drop_duplicates("en_name", keep="first")


In [None]:
monster_df

Unnamed: 0,id,name,Desc,DropListKV,HasDieEffect,IfCanShowInDpsTest,PetId,Weather,alertRange,aniSpeed,baseExp,castSkillRate,criticalLevel,criticalResistanceLevel,dieSound,dieSoundTime,dodgeLevel,fixedPhysicDamage,hitLevel,ifActive,isShowInMap,jobExp,magicDefenseLevel,magicDps,magicPenetrationLevel,maxHp,monsterCollectionId,nameLocalized,navPos,navSceneId,navScenesId,patrolRange,physicDefenseLevel,physicDps,physicPenetrationLevel,property,race,resId,runSound,skills,soundVolume,speed,staticId,zeny,IfIgnoreInvisibility,Scale,bodily,fixedMagicDamage,level,patrolSpeed,chaseRange,dialogueBubbleList,isUnmove,patrolType,MagicMap,type,EffectHang,EffectId,radius,canBattleTeleport,bornSound,criticalRate,criticalResistanceRate,dodgeRate,finalMagicDefenseIncrease,finalPhysicDefenseIncrease,hitIncrease,magicDamageIncrease,magicDamagedIncrease,magicPenetrationIncrease,physicDamageIncrease,physicDamagedIncrease,physicPenetrationIncrease,bronEffectId,ifChangeTarget,tips,ifSelectPlayerFirst,readAttrFrom,criticalValue,DpsTestId,MvpRankDrop,MvpRareDrop,isBeHitBack,monsterTypeForServer,mvpDropId,bloodNums,showMiniProfile,IsSpecialSkillNotTarget,bornSkillId,ShowRing,forceType,SkillEffect,isHideName,isIgnoreForceAttack,attackSpeedIncrease,RingScale,ExtraEffect,dieEffectPath,PatrolPos,isKeyMonster,isLevelEffect,cantSelect,isHideBlood,EffectScale,idleSound,isResetStateLeaveBattle,hasWhiteEffect,overChaseChangeHatred,RareDropType,ifControlledByPunishment,rebound,fixedMagicDamageReduce,fixedPhysicDamageReduce,dropAnnouncementId,criticalResistanceValue,calDamageToCreator,followType,lootForAll,name_id,en_name,desc_id,en_desc
1,10002,MonsterName10002,MonsterCollection10002,"{'0': 1, '1000201': 1, '1000202': 1, '1000203'...",1.0,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",,1.5,17.0,35.0,1.0,1.0,10002die,0.389,1.0,,1.0,0.0,1.0,17.0,7.0,11.0,3.0,65.0,10002.0,绿棉虫,"[{'posx': 131.1, 'posy': 12.500852, 'posz': 3}]",1020.0,[1020],3.0,7.0,7.0,4.0,4.0,8.0,10002.0,,"[{'castWeight': 0, 'skillId': 10002101}, {'cas...",0.65,1.5,10002,9.0,1.0,0.95,2.0,1.0,2.0,1.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10002,Fabre,10002,"Fabres, often found near crops, has only one ..."
2,10003,MonsterName10003,MonsterCollection10003,"{'1000301': 1, '1000302': 1, '1000303': 1, '10...",1.0,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",0.0,,18.0,,2.0,2.0,10003die,0.466,2.0,1.0,2.0,0.0,1.0,18.0,8.0,,5.0,72.0,10003.0,虫蛹,"[{'posx': 145.7, 'posy': 4.6, 'posz': -11.6}]",1020.0,[1020],0.0,8.0,,3.0,4.0,8.0,10003.0,,"[{'castWeight': 0, 'skillId': 10003111}]",0.65,0.0,10003,9.0,,0.95,2.0,,3.0,0.0,0.0,"[{'dialogueBubbleId': 100023, 'dialogueBubbleR...",1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10003,Pupa,10003,"When Fabres reach a certain stage of growth, ..."
3,10004,MonsterName10004,MonsterCollection10004,"{'1000401': 1, '1000403': 1, '1000404': 1, '10...",,1.0,82003.0,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",7.0,,19.0,,2.0,2.0,31006die,2.113,2.0,1.0,2.0,0.0,1.0,19.0,6.0,7.0,5.0,80.0,10004.0,疯兔,"[{'posx': -54.4, 'posy': 14.883457, 'posz': 14...",1020.0,[1020],3.0,10.0,11.0,3.0,,512.0,10004.0,31006run,"[{'castWeight': 0, 'skillId': 10004101}]",0.65,2.0,10004,9.0,,0.95,2.0,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10004,Lunatic,10004,The bunny monster which dwells near Prontera ...
4,10005,MonsterName10005,MonsterCollection10005,"{'1000501': 1, '1000503': 1, '1000504': 1, '10...",1.0,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",0.0,,28.0,,3.0,3.0,,,3.0,,3.0,0.0,1.0,28.0,18.0,,6.0,242.0,10005.0,盗虫卵,"[{'posx': 61.05, 'posy': -1.4555541, 'posz': 1...",1050.0,[1050],0.0,12.0,,9.0,7.0,8.0,10005.0,,"[{'castWeight': 0, 'skillId': 10005111}]",,0.0,10005,13.0,1.0,0.9,2.0,3.0,13.0,0.0,0.0,,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10005,Thief Bug Egg,10005,"In the kingdom's sewers, brown spherical obje..."
5,10006,MonsterName10006,MonsterCollection10006,"{'1000601': 1, '1000602': 1, '1000603': 1, '10...",,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",,,41.0,35.0,3.0,3.0,31004die,0.844,3.0,,3.0,0.0,1.0,14.0,13.0,20.0,5.0,204.0,10006.0,苍蝇,"[{'posx': 37.8, 'posy': 24.268124, 'posz': 50.3}]",1310.0,[1310],3.0,13.0,13.0,8.0,3.0,8.0,10006.0,31004run,"[{'castWeight': 0, 'skillId': 10006101}, {'cas...",0.4,3.5,10006,13.0,1.0,0.9,2.0,2.0,12.0,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10006,Chonchon,10006,A flying monster with big watery eyes. Accord...
6,10007,MonsterName10007,MonsterCollection10007,"{'1000701': 1, '1000702': 1, '1000703': 1, '10...",,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",,,50.0,36.0,2.0,2.0,10007die,0.755,2.0,2.0,2.0,0.0,1.0,50.0,10.0,21.0,6.0,273.0,10007.0,树精,"[{'posx': -18.9, 'posy': 14.163678, 'posz': 51...",1430.0,[1430],3.0,10.0,31.0,4.0,4.0,256.0,10007.0,,"[{'castWeight': 0, 'skillId': 10007101}, {'cas...",0.65,,10007,27.0,,0.9,,,10.0,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10007,Willow,10007,A sentient plant monster freed from the restr...
7,10008,MonsterName10008,MonsterCollection10008,"{'1000801': 1, '1000802': 1, '1000803': 1, '10...",,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",,5.0,48.0,35.0,14.0,14.0,10008die,0.687,14.0,24.0,14.0,0.0,1.0,144.0,86.0,389.0,43.0,7686.0,10008.0,罗达蛙,"[{'posx': 3.5714493, 'posy': 1.3904978, 'posz'...",1130.0,[1130],3.0,57.0,584.0,29.0,2.0,16.0,10008.0,10008run,"[{'castWeight': 0, 'skillId': 10008101}, {'cas...",0.65,5.0,10008,48.0,,,,,33.0,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10008,Roda Frog,10008,An amazing saltwater frog most often found dw...
8,10009,MonsterName10009,MonsterCollection10009,"{'1000901': 1, '1000904': 1, '1000905': 1, '10...",,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",,,23.0,35.0,4.0,4.0,,,4.0,4.0,4.0,0.0,1.0,38.0,19.0,25.0,11.0,328.0,10009.0,盗虫,"[{'posx': 52.910763, 'posy': -1.2429972, 'posz...",1050.0,[1050],3.0,19.0,37.0,8.0,,8.0,10009.0,,"[{'castWeight': 0, 'skillId': 10009101}, {'cas...",,2.0,10009,13.0,1.0,,2.0,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10009,Thief Bug,10009,"Once an insignificant bug, but mutated once c..."
9,10010,MonsterName10010,MonsterCollection10010,"{'1001001': 1, '1001002': 1, '1001003': 1, '10...",,1.0,82007.0,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",2.0,3.3,78.0,35.0,2.0,2.0,10010die,0.473,2.0,2.0,2.0,0.0,1.0,26.0,9.0,28.0,7.0,338.0,10010.0,蝗虫,"[{'posx': 37, 'posy': 11.06618, 'posz': 45.1}]",1030.0,[1030],3.0,14.0,42.0,5.0,4.0,8.0,10010.0,10010run,"[{'castWeight': 0, 'skillId': 10010101}, {'cas...",0.65,3.3,10010,27.0,1.0,,,,11.0,3.3,,"[{'dialogueBubbleId': 100020, 'dialogueBubbleR...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10010,Grasshopper,10010,A grasshopper-shaped monster with a violin in...
10,10011,MonsterName10011,MonsterCollection10011,"{'1001101': 1, '1001102': 1, '1001103': 1, '10...",1.0,1.0,,"[{'AffixMonsterRate': 0, 'Prefix': [{'AffixId'...",,2.0,21.0,35.0,2.0,2.0,10011die,0.912,2.0,1.0,2.0,0.0,1.0,21.0,7.0,6.0,5.0,96.0,10011.0,小鸡,"[{'posx': 85.6, 'posy': 26.4282, 'posz': 41.2}]",1310.0,[1310],3.0,10.0,9.0,3.0,1.0,512.0,10011.0,10011run,"[{'castWeight': 0, 'skillId': 10011101}, {'cas...",0.65,2.0,10011,9.0,,,2.0,,6.0,,,"[{'dialogueBubbleId': 100002, 'dialogueBubbleR...",,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10011,Picky,10011,A cute chick monster that never seems to grow...


### Save data for Monster Drop

In [None]:
monster_drop_records = monster_df[["id", "en_name", "DropListKV"]].to_dict(orient="records")

### Monster Skills

In [None]:
monster_skills_raw_df = monster_df[["id", "skills"]]

### Filter columns and column name correction

In [None]:
monster_df = monster_df.rename(columns={
    "id": "id",
    "attackSpeedIncrease": "final_aspd",
    "baseExp": "b_exp",
    "criticalLevel": "crit",
    "criticalRate": "final_crit",
    "criticalResistanceLevel": "crit_res",
    "criticalResistanceRate" : "final_crit_res",
    "dodgeLevel": "dodge",
    "dodgeRate": "final_dodge",
    "hitLevel": "hit",
    "hitIncrease": "final_hit",
    "jobExp": "j_exp",
    "magicDefenseLevel": "m_def",
    "finalMagicDefenseIncrease" : "final_m_def",
    "magicDamageIncrease": "final_m_dmg_bonus",
    "magicDamagedIncrease": "final_m_dmg_res",
    "magicDps": "m_dps",
    "magicPenetrationLevel": "m_pen",
    "magicPenetrationIncrease": "final_m_pen",
    "magicRebound": "m_reflect",
    "magicVampire": "m_lifesteal",
    "fixedMagicDamage": "m_dmg_bonus",
    "fixedMagicDamageReduce": "m_dmg_res",
    "maxHp": "max_hp",
    "navSceneId": "location",
    "bodily": "size",
    "physicDefenseLevel": "p_def",
    "physicDps": "p_dps",
    "physicDamageIncrease": "final_p_dmg_bonus",
    "fixedPhysicDamage": "p_dmg_bonus",
    "physicDamagedIncrease" : "final_p_dmg_res",
    "physicPenetrationIncrease": "final_p_pen",
    "finalPhysicDefenseIncrease": "final_p_def",
    "physicPenetrationLevel": "p_pen",
    "fixedPhysicDamageReduce": "p_dmg_res",
    "vampire": "p_lifesteal",
    "rebound": 'p_reflect',
    "property": "property",
    "race": "race",
    "type": "type",
    "level": "level",
    "resId": "res_id",
    "en_desc": "description"
    })

monster_df = monster_df[['id', 'en_name', 'b_exp', 'crit', 'final_crit', 'crit_res', 'final_crit_res', 'dodge', 'final_dodge', 'final_aspd', 'hit', 'final_hit', 'j_exp', 'm_def', 'final_m_def', 'final_m_dmg_bonus', 'final_m_dmg_res', 'm_dps', 'm_pen', 'final_m_pen', 'm_dmg_bonus', 'm_dmg_res', 'max_hp', 'location', 'size', 'p_def', 'final_p_def', 'p_dps', 'final_p_dmg_bonus', 'final_p_dmg_res', 'final_p_pen', 'p_pen', 'p_reflect', 'p_dmg_res', 'p_dmg_bonus', 'property', 'race', 'type', 'level', 'res_id', 'description', "zeny"]]

### Data correction

In [None]:
cols = ['crit', 'crit_res', 'dodge', 'hit', 'j_exp', 'm_def', 'm_dps', 'm_pen', 'max_hp', 'location', 'p_def', 'p_dps', 'p_pen']

for col in cols:
  monster_df[col] = monster_df[col].fillna(0)

monster_df["size"] = monster_df["size"].fillna(1)
monster_df["race"] = monster_df["race"].fillna(32)

cols = ['property', 'type']

for col in cols:
  monster_df[col] = monster_df[col].fillna(0)

monster_df["level"] = monster_df["level"].fillna(1)
monster_df = monster_df.rename(columns={"size": "size_id", "property": "attr_id", "race": "race_id", "type": "type_id", "location": "loc_id"})

### Merge with information from `en_langs`

#### Monster size

In [None]:
monster_size_df = pd.DataFrame({"size_id": [0, 1, 2, 3], "size": ["Large", "Medium", "Small", "Giant"]})
monster_df["size_id"] = monster_df["size_id"].astype(float)
monster_df = pd.merge(monster_df, monster_size_df, how="left", left_on="size_id", right_on="size_id")

#### Monster attributes

In [None]:
monster_attr_df = pd.DataFrame(en_parsed_data["attr"])
monster_attr_df = monster_attr_df.rename(columns={"id": "attr_id", "value": "attr"})
monster_attr_df["attr_id"] = monster_attr_df["attr_id"].astype(float)
monster_df["attr_id"] = monster_df["attr_id"].astype(float)
monster_attr_df["attr_id"] = monster_attr_df["attr_id"] - 1
monster_df = pd.merge(monster_df, monster_attr_df, how="left", left_on="attr_id", right_on="attr_id")

#### Monster race

In [None]:
monster_race_df = pd.DataFrame(en_parsed_data["race"])
monster_race_df = monster_race_df.rename(columns={"id": "race_id", "value": "race"})
monster_race_df["race_id"] = monster_race_df["race_id"].astype(float)
monster_df["race_id"] = monster_df["race_id"].astype(float)
monster_df = pd.merge(monster_df, monster_race_df, how="left", left_on="race_id", right_on="race_id")

#### Monster location

In [None]:
monster_loc_df = pd.DataFrame(en_parsed_data["scene_name"])
monster_loc_df = monster_loc_df.rename(columns={"id": "loc_id", "value": "loc"})
monster_loc_df["loc_id"] = monster_loc_df["loc_id"].astype(float)
monster_df["loc_id"] = monster_df["loc_id"].astype(float)
monster_df = pd.merge(monster_df, monster_loc_df, how="left", left_on="loc_id", right_on="loc_id")

### Data correction (cont)

In [None]:
monster_df = monster_df.drop(["size_id", "attr_id", "race_id"], axis=1)
cols = ["en_name", "size", "attr", "race"]

for col in cols:
  monster_df[col] = monster_df[col].str.lower()

monster_df["race"] = monster_df["race"].fillna("demi-human")
monster_df = monster_df.drop("type_id", axis=1)
monster_df = monster_df.rename(columns={
    "en_name": "name",
})

In [None]:
monster_df = monster_df.query("loc != '???'")

In [None]:
monster_final_df = monster_df.copy()

In [None]:
monster_final_df

Unnamed: 0,id,name,b_exp,crit,final_crit,crit_res,final_crit_res,dodge,final_dodge,final_aspd,hit,final_hit,j_exp,m_def,final_m_def,final_m_dmg_bonus,final_m_dmg_res,m_dps,m_pen,final_m_pen,m_dmg_bonus,m_dmg_res,max_hp,loc_id,p_def,final_p_def,p_dps,final_p_dmg_bonus,final_p_dmg_res,final_p_pen,p_pen,p_reflect,p_dmg_res,p_dmg_bonus,level,res_id,description,zeny,size,attr,race,loc
0,10002,fabre,17.0,1.0,,1.0,,1.0,,,1.0,,17.0,7.0,,,,11.0,3.0,,1.0,,65.0,1020.0,7.0,,7.0,,,,4.0,,,,2.0,10002.0,"Fabres, often found near crops, has only one ...",9.0,small,earth,insect,Prontera South Gate
1,10003,pupa,18.0,2.0,,2.0,,2.0,,,2.0,,18.0,8.0,,,,0.0,5.0,,,,72.0,1020.0,8.0,,0.0,,,,3.0,,,1.0,3.0,10003.0,"When Fabres reach a certain stage of growth, ...",9.0,small,earth,insect,Prontera South Gate
2,10004,lunatic,19.0,2.0,,2.0,,2.0,,,2.0,,19.0,6.0,,,,7.0,5.0,,,,80.0,1020.0,10.0,,11.0,,,,3.0,,,1.0,4.0,10004.0,The bunny monster which dwells near Prontera ...,9.0,small,neutral,brute,Prontera South Gate
3,10005,thief bug egg,28.0,3.0,,3.0,,3.0,,,3.0,,28.0,18.0,,,,0.0,6.0,,3.0,,242.0,1050.0,12.0,,0.0,,,,9.0,,,,13.0,10005.0,"In the kingdom's sewers, brown spherical obje...",13.0,small,shadow,insect,Capital Sewer 1F
4,10006,chonchon,41.0,3.0,,3.0,,3.0,,,3.0,,14.0,13.0,,,,20.0,5.0,,2.0,,204.0,1310.0,13.0,,13.0,,,,8.0,,,,12.0,10006.0,A flying monster with big watery eyes. Accord...,13.0,small,wind,insect,Morroc
5,10007,willow,50.0,2.0,,2.0,,2.0,,,2.0,,50.0,10.0,,,,21.0,6.0,,,,273.0,1430.0,10.0,,31.0,,,,4.0,,,2.0,10.0,10007.0,A sentient plant monster freed from the restr...,27.0,medium,earth,plant,Payon South
6,10008,roda frog,48.0,14.0,,14.0,,14.0,,,14.0,,144.0,86.0,,,,389.0,43.0,,,,7686.0,1130.0,57.0,,584.0,,,,29.0,,,24.0,33.0,10008.0,An amazing saltwater frog most often found dw...,48.0,medium,water,fish,Shipwreck
7,10009,thief bug,23.0,4.0,,4.0,,4.0,,,4.0,,38.0,19.0,,,,25.0,11.0,,,,328.0,1050.0,19.0,,37.0,,,,8.0,,,4.0,15.0,10009.0,"Once an insignificant bug, but mutated once c...",13.0,small,neutral,insect,Capital Sewer 1F
8,10010,grasshopper,78.0,2.0,,2.0,,2.0,,,2.0,,26.0,9.0,,,,28.0,7.0,,,,338.0,1030.0,14.0,,42.0,,,,5.0,,,2.0,11.0,10010.0,A grasshopper-shaped monster with a violin in...,27.0,medium,earth,insect,Prontera West Gate
9,10011,picky,21.0,2.0,,2.0,,2.0,,,2.0,,21.0,7.0,,,,6.0,5.0,,,,96.0,1310.0,10.0,,9.0,,,,3.0,,,1.0,6.0,10011.0,A cute chick monster that never seems to grow...,9.0,small,fire,brute,Morroc


#### Save data for database

In [None]:
monster_final_df.to_csv(f"{MAIN_PARSED_DIR}/monster_tw_{APK_DATE}.csv", index=False)