# Initial Prep

## Library Import

In [None]:
from ast import literal_eval
import math
import os
import re
import warnings

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
warnings.filterwarnings(action= 'ignore')

## Connect to Drive

In [None]:
# from google.colab import drive

# drive.mount('/content/drive', force_remount=True)

### Copy Files from Drive

In [None]:
# !mkdir update_changes/
# !mkdir update_changes/20220816
# !mkdir update_changes/20220816/cleaned
# !cp -r /content/drive/MyDrive/update_changes/20220816/cleaned/* ./update_changes/20220816/cleaned/

In [None]:
# !rm -r update_changes

## Folder Prep

In [None]:
APK_DATE = "20220816"
MAIN_DIR = "./update_changes"
MAIN_CLEANED_DIR = f"{MAIN_DIR}/{APK_DATE}/cleaned"
MAIN_PARSED_DIR = f"{MAIN_DIR}/{APK_DATE}/parsed"

if not os.path.isdir(MAIN_PARSED_DIR):
  os.mkdir(MAIN_PARSED_DIR)

## Function Initializtion

In [None]:
def get_id_value(text: str, context: str, with_underscore: bool = False) -> tuple:
  """
  Get the id for the key this id will be used
  to connect the information to the other
  tables (basically primary key for the information
  provided)

  Parameters
  ----------
  text
    Text in the form of the data dump mostly it 
    looks like this
    `["AttrPool_11010023"]="Refine to +9, ATK +5%",`
  context
    The name of the key before the id, for 
    example for the text above, the context will
    be `AttrPool`
  with_underscore
    Whether the attr contains underscore in its name
    or not

  Returns
  -------
  id, value
    tuple containing id and value of the attr
  """
  
  regex = f'\"{context}([\d\ \_]*)\"]=\"(.*)\"'

  if with_underscore:
    regex = f'\"{context}_(\w*)\"]=\"(.*)\"'

  return re.findall(regex, text)[0]

def create_and_add_entry(context: str, key: str, with_underscore: bool) -> None:
  """
  Create entry for table and append it to the
  existing dictionary

  Parameters
  ----------
  context
    The name of the key before the id, for 
    example for the text above, the context will
    be `AttrPool`
  key
    The name of the key in the dictionary
  with_underscore
    Whether the attr contains underscore in its name
    or not

  Returns
  -------
  None
  """

  try:
    entry = {}

    id, value = get_id_value(text, context, with_underscore=with_underscore)
    entry["id"] = id
    entry["value"] = value

    parsed_data[key].append(entry)
  except:
    pass

In [None]:
def parse_and_convert(texts):
  texts = " ".join(texts)
  texts = re.sub(r'\[([\w\"]+)\]=', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

  texts = texts.replace("{}", "[]")
  texts = texts.replace("{ {", "[ {")
  texts = texts.replace("} }", "} ]")

  texts = "{" + texts + "}"

  parsed_dict = literal_eval(texts)

  return parsed_dict

# Parser

## En_langs

### Declare RE for cleaning Chinese chars

In [None]:
RE = re.compile(u'[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]', re.UNICODE)

### Parse data

In [None]:
parsed_data = {
    "area_name": [],
    "area_name_new": [],
    "attr_pool": [],
    "attr": [],
    "battle_pass_quest_desc": [],
    "battle_pass_quest_name": [],
    "size": [],
    "buff_desc": [],
    "buff_name": [],
    "card_attr_desc": [],
    "card_coordinates": [],
    "equip_desc": [],
    "equip_name": [],
    "equip_type": [],
    "goods_desc": [],
    "item_desc": [],
    "item_name": [],
    "item_type": [],
    "instance_description": [],
    "job_name": [],
    "mvp_desc": [],
    "mvp_name": [],
    "map_npc_name": [],
    "monster_desc": [],
    "monster_name": [],
    "monster_type": [],
    "ox_exam_question": [],
    "property": [],
    "race": [],
    "scene_name": [],
    "shadow_weapon": [],
    "shadow_weapon_name": [],
    "shadow_weapon_task": [],
    "shadow_weapon_task_name": [],
    "shadow_weapon_prop_des": [],
    "skill_name": [],
    "skill_desc": [],
    "suit_name": [],
    "title": [],
    "weather": [],
    "weather_desc": [],
    "mount_name": [],
    "mount_desc": [],
    "mount_job": [],
    "scene_name": []
}

# Initially I want to create something like this for DRY, but for
# the sake of readability will do it manually
# contexts = ["Areaname", "AttrPool", "Attr", "BattlePassQuestDesc"]

with open(f"{MAIN_CLEANED_DIR}/en_langs.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    if text.startswith('["areaname'):
      create_and_add_entry("areaname", "area_name", False)
    elif text.startswith('["Areaname'):
      create_and_add_entry("Areaname", "area_name_new", False)
    elif text.startswith('["AttrPool'):
      create_and_add_entry("AttrPool", "attr_pool", True)
    elif text.startswith('["Attr'):
      create_and_add_entry("Attr", "attr", True)
    elif text.startswith('["BattlePassQuestDesc'):
      create_and_add_entry("BattlePassQuestDesc", "battle_pass_quest_desc", True)
    elif text.startswith('["BattlePassQuestName'):
      create_and_add_entry("BattlePassQuestName", "battle_pass_quest_desc", True)
    elif text.startswith('["Body'):
      create_and_add_entry("Body", "body", True)
    elif text.startswith('["BuffDes'):
      create_and_add_entry("BuffDes", "buff_desc", True)
    elif text.startswith('["BuffName'):
      create_and_add_entry("BuffName", "buff_name", True)
    elif text.startswith('["CardAttributeDescription'):
      create_and_add_entry("CardAttributeDescription", "card_attr_desc", True)
    elif text.startswith('["CardCoordinates'):
      create_and_add_entry("CardCoordinates", "card_coordinates", True)
    elif text.startswith('["EquipDesc'):
      create_and_add_entry("EquipDesc", "equip_desc", True)
    elif text.startswith('["EquipName'):
      create_and_add_entry("EquipName", "equip_name", True)
    elif text.startswith('["equipmentType'):
      create_and_add_entry("equipmentType", "equip_type", True)
    elif text.startswith('["GoodsDes'):
      create_and_add_entry("GoodsDes", "goods_desc", False)
    elif text.startswith('["ItemDes'):
      create_and_add_entry("ItemDes", "item_desc", True)
    elif text.startswith('["ItemName'):
      create_and_add_entry("ItemName", "item_name", True)
    elif text.startswith('["ItemType'):
      create_and_add_entry("ItemType", "item_type", False)
    elif text.startswith('["InstanceDescription'):
      create_and_add_entry("InstanceDescription", "instance_description", False)
    elif text.startswith('["JobName'):
      create_and_add_entry("JobName", "job_name", True)
    elif text.startswith('["JobName'):
      create_and_add_entry("JobName", "job_name", True)
    elif text.startswith('["MVPDes'):
      create_and_add_entry("MVPDes", "mvp_desc", True)
    elif text.startswith('["MVPName'):
      create_and_add_entry("MVPName", "mvp_name", True)
    elif text.startswith('["MapNpcName'):
      create_and_add_entry("MapNpcName", "map_npc_name", False)
    elif text.startswith('["MonsterCollection'):
      create_and_add_entry("MonsterCollection", "monster_desc", False)
    elif text.startswith('["MonsterName'):
      create_and_add_entry("MonsterName", "monster_name", False)
    elif text.startswith('["MonsterType'):
      create_and_add_entry("MonsterType", "monster_type", True)
    elif text.startswith('["OXExam'):
      create_and_add_entry("OXExam", "ox_exam_question", True)
    elif text.startswith('["Property'):
      create_and_add_entry("Property", "property", False)
    elif text.startswith('["Race'):
      create_and_add_entry("Race", "race", True)
    elif text.startswith('["SceneName'):
      create_and_add_entry("SceneName", "scene_name", False)
    elif text.startswith('["ShadowWeaponName'):
      create_and_add_entry("ShadowWeaponName", "shadow_weapon_name", True)
    elif text.startswith('["ShadowWeaponTaskName'):
      create_and_add_entry("ShadowWeaponTaskName", "shadow_weapon_task_name", True)
    elif text.startswith('["ShadowWeaponPropDes'):
      create_and_add_entry("ShadowWeaponPropDes", "shadow_weapon_prop_des", True)
    elif text.startswith('["ShadowWeaponTask'):
      create_and_add_entry("ShadowWeaponTask", "shadow_weapon_task", True)
    elif text.startswith('["SkillDesc'):
      create_and_add_entry("SkillDesc", "skill_desc", True)
    elif text.startswith('["SkillName'):
      create_and_add_entry("SkillName", "skill_name", True)
    elif text.startswith('["SuitName'):
      create_and_add_entry("SuitName", "suit_name", False)
    elif text.startswith('["Title'):
      create_and_add_entry("Title", "title", True)
    elif text.startswith('["WeatherDes'):
      create_and_add_entry("WeatherDes", "weather_desc", True)
    elif text.startswith('["Weather'):
      create_and_add_entry("Weather", "weather", True)
    elif text.startswith('["SceneName'):
      create_and_add_entry("SceneName", "scene_name", True)

## Skill

### Get information from `en_langs`

In [None]:
skill_name_df = pd.DataFrame(parsed_data["skill_name"]).rename(columns={"value": "name"})
skill_desc_df = pd.DataFrame(parsed_data["skill_desc"]).rename(columns={"value": "description"})

In [None]:
skill_df = pd.merge(skill_name_df, skill_desc_df, how="left", left_on="id", right_on="id")

In [None]:
skill_final_df = skill_df.copy()

### Save data for database

In [None]:
skill_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_{APK_DATE}.csv", index=False)

## Skill Advanced

### Parse data

In [None]:
texts = []
LIMIT = 999999

with open(f"{MAIN_CLEANED_DIR}/data_skill_Skill.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())

  texts = " ".join(texts)

  texts = re.sub(r'\[([\w\"]+)\]=', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

  texts = texts.replace("{}", "[]")
  texts = texts.replace("{ {", "[ {")
  texts = texts.replace("} } }, {", "AAAAA")
  texts = texts.replace("} } } }", "BBBBB")
  texts = texts.replace("} } }", "XXXXX")
  texts = texts.replace("} }, {", "ZZZZZ")
  texts = texts.replace("} }", "YYYYY")
  texts = texts.replace("XXXXX", "} } ]")
  texts = texts.replace("ZZZZZ", "} }, {")
  texts = texts.replace("YYYYY", "} ]")
  texts = texts.replace("AAAAA", "} ] }, {")
  texts = texts.replace("BBBBB", "} ] } ]")

  texts = "{" + texts + "}"
  skills = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
skill_entries = []

for id, parsed_dict in skills.items():
  skill_entry = {}
  default_key_dict = {
      "is_mount_combat": "IsMountCombat",
      "range": "range",
      "is_damage_skill": "IsDamageSkill",
      "require_mount_combat": "RequireMountCombat",
      "skill_group_id" : "SkillGroupID",
      "max_level" : "MaxLevel",
      "pre_skill" : "PreSkill",
      "extra_range_skill_id": "ExtraRangeSkillId",
      "buff_list": "BuffList",
      "type": "Type",
      "cooldown": "CoolDown",
      "related_buff" : "RelatedBuff",
      "pet_skill_type": "PetSkillType",
      "must_equip": "mustEquip",
      "job": "Job",
      "max_hp_cost": "MaxHpCost",
      "skill_id": "SkillId",
      "is_pet_skill_can_use_when_master_die": "IsPetSkillCanUseWhenMasterDie",
      "auto_battle_type": "AutoBattleType",
      "skill_weapon": "SkillWeapon",
      "cost_zeny": "CostZeny",
      "name": "Name",
      "desc_args": "Desc_args",
      "combo": "combo",
      "skill_sketch": "SkillSketch",
      "res_id": "ResID",
      "require_mount_id": "RequireMountID",
      "pet_skill_element": "PetSkillElement",
      "desc": "Desc",
      "fixed_cooldown": "FixedCoolDown",
      "suit_skills_or_not": "SuitSkillsOrNot",
      "pre_item": 'PreItem',
      "cost_item": "CostItem",
      "cost": "Cost",
      "is_show_skill_tree": "isShowSkillTree",

  }
  
  skill_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      skill_entry[col] = parsed_dict[def_key]
    except:
      pass
  skill_entries.append(skill_entry)

skill_df = pd.DataFrame(skill_entries)

### Merge with information from `en_langs`

In [None]:
skill_df = skill_df.fillna("-9999")

#### Skill name


In [None]:
skill_name_df = pd.DataFrame(parsed_data["skill_name"]).rename(columns={"id": "skill_name_id", "value": "name"})

skill_df["skill_name_id"] = skill_df["name"].str.replace("SkillName_", "")
skill_df = skill_df.drop("name", axis=1)
skill_df = pd.merge(skill_df, skill_name_df, how="left", left_on=["skill_name_id"], right_on=["skill_name_id"])


#### Skill description

In [None]:
isnull = skill_df["desc"].isnull()
skill_df.loc[isnull, 'desc'] = pd.Series([[None]] * isnull.sum()).values
skill_df["desc"] = skill_df["desc"].apply(list).str[0]

skill_df["skill_desc_id"] = skill_df["desc"].str.replace("SkillDesc_", "")
skill_df = skill_df.drop("desc", axis=1)

skill_desc_df = pd.DataFrame(parsed_data["skill_desc"]).rename(columns={"id": "skill_desc_id", "value": "desc"})
skill_df = pd.merge(skill_df, skill_desc_df, how="left", left_on=["skill_desc_id"], right_on=["skill_desc_id"])

### Fix missing values

In [None]:
skill_df["is_damage_skill"] = skill_df["is_damage_skill"].fillna(1)

skill_df["type"] = skill_df["type"].fillna(1)

isnull = skill_df["cooldown"].isnull()
skill_df.loc[isnull, 'cooldown'] = pd.Series([[None]] * isnull.sum()).values
skill_df["cooldown"] = skill_df["cooldown"].apply(list).str[0]

isnull = skill_df["cost"].isnull()
skill_df.loc[isnull, 'cost'] = pd.Series([[None]] * isnull.sum()).values
skill_df["cost"] = skill_df["cost"].apply(list).str[0]

skill_df["is_mount_combat"] = skill_df["is_mount_combat"].fillna(0)

skill_df["is_show_skill_tree"] = skill_df["is_show_skill_tree"].fillna(0)

### Select columns for database

In [None]:
skill_final_df = skill_df[["id", "name", "desc", "skill_group_id", "is_damage_skill", "max_level", "type", "cooldown", "job", "res_id", "cost", "is_mount_combat", "is_show_skill_tree", "skill_weapon", "fixed_cooldown", "combo", "range", "require_mount_combat", "pet_skill_type", "is_pet_skill_can_use_when_master_die", "pet_skill_element", "suit_skills_or_not"]]

In [None]:
skill_final_df = skill_final_df.rename(columns={"desc": "description", "skill_group_id": "group_id"})

In [None]:
skill_final_df["skill_weapon"] = skill_final_df["skill_weapon"].apply(lambda x: np.nan if x == [] else x)

In [None]:
skill_final_df["fixed_cooldown"] = skill_final_df["fixed_cooldown"].apply(lambda x: np.nan if x == [] else x[0])

### Save for database

In [None]:
skill_complex_final_df = skill_final_df.drop("skill_weapon", axis=1)

In [None]:
skill_complex_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_complex_{APK_DATE}.csv", index=False)

## Item


### Get item information from `en_langs`

In [None]:
item_name_df = pd.DataFrame(parsed_data["item_name"])
item_desc_df = pd.DataFrame(parsed_data["item_desc"])
item_type_df = pd.DataFrame(parsed_data["item_type"])

### Parse data

In [None]:
texts = []
LIMIT = 9999999
with open(f"{MAIN_CLEANED_DIR}/data_item_Item.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())
    
  texts = " ".join(texts)

  texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)
  
  # print(texts)

  texts = texts.replace("{ {", "AAAAA")
  texts = texts.replace("}, }, },", "BBBBB")
  texts = texts.replace("}, }, {", "DDDDD")
  texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
  texts = re.sub(r'\}, \},$', 'EEEEE', texts)
  texts = texts.replace("}, },", "CCCCC")

  texts = texts.replace("AAAAA", "[ {")
  texts = texts.replace("BBBBB", "}, }, ],")
  texts = texts.replace("CCCCC", "}, ],")
  texts = texts.replace("DDDDD", "}, }, {")
  texts = texts.replace("EEEEE", "}, },")
  texts = texts.replace("FFFFF", "}, }, ")

  texts = "{" + texts + "}"
  

  # print(texts)

items = literal_eval(texts)
# print(len(items))

### Create entries for DataFrame creation

In [None]:
item_entries = []

for id, parsed_dict in items.items():
  item_entry = {}

  default_key_dict = {
      "cd": "cd",
      "item_desc": "itemDesc",
      "item_type": "itemType",
      "max_stack": "maxStack",
      "page": "page",
      "res_id": "resId",
      "stackable": "stackable",
      "weight": "weight",
      "static_id": "staticId",
      "card_attrs": "CardAttrs",
      "card_quality": "cardQuality",
      "card_slots": "cardSlots",
      "deposite_attrs": "DepositeAttrs",
      "is_mvp_card": "IsMvpCard",
      "item_subtype": "itemSubType",
      "min_level": "minLevel",
      "monster_id": "monster_id",
      "name": "name",
      "unlock_adventure_exp": "UnlockAdventureExp",
      "expired_date": "ExpiredDate",
      "is_bind": "isBind",
      "item_expired_type": "itemExpiredType",
      "sub_page": "subPage",
      "is_hide": "isHide",
      "use": "use",
      "show_in_ui": "showInUi",
      "acquire": "Acquire",
      "card_coordinate_point": "CardCoordinatePoint",
      "is_in_collection": "IsInCollection",
      "monster": "Monster",
      "sell_price": "SellPrice",
      "area_id": "AreaId",
      "cd_group_id": 'CdGroupId',
      "cd_type": 'CdType',
      "element": 'Element',
      "exp": 'Exp',
      "fish_rod_type": 'FishRodType',
      "fish_tool_type": 'FishToolType',
      "gift_send_limit": 'GiftSendLimit',
      "if_can_quick_use": 'IfCanQuickUse',
      "if_combined_for_life": 'IfCombinedForLife',
      "if_get_off_mount": 'IfGetOffMount',
      "if_stop_navigation": 'IfStopNavigation',
      "interface_id": 'InterfaceId',
      "is_gift_item": 'IsGiftItem',
      "item_quality": 'ItemQuality',
      "item_static_id": 'ItemStaticId',
      "max_use": 'MaxUse',
      "mine_tool_type": 'MineToolType',
      "npc_id": "NpcId",
      "npc_navigation": 'NpcNavigation',
      "oon_box_loot_bind_status": 'OONBoxLootBindStatus',
      "oon_box_loot_id": 'OONBoxLootId',
      "oon_box_loot_number": 'OONBoxLootNumber',
      "oon_box_loot_type": 'OONBoxLootType',
      "pet_pill_bullet_id": 'PetPillBulletId',
      "pet_pill_fixed_damage_rate": 'PetPillFixedDamageRate',
      "pet_pill_max_damage": 'PetPillMaxDamage',
      "related_activity_type": 'RelatedActivityType',
      "related_pet_skill": 'RelatedPetSkill',
      "scene_id": 'SceneId',
      "sell_navigation": 'SellNavigation',
      "sell_price": 'SellPrice',
      "stall_currency_type": 'StallCurrencyType',
      "stall_item_level": 'StallItemLevel',
      "stall_price_lower_limit": 'StallPriceLowerLimit',
      "stall_price_type": 'StallPriceType',
      "stall_price_upper_limit": 'StallPriceUpperLimit',
      "stall_type": 'StallType',
      "stall_zeny_child_label": 'StallZenyChildLabel',
      "is_all_job": 'isAllJob',
      "is_bind": 'isBind',
      "item_expired_type": 'itemExpiredType',
      "item_subtype_task": 'itemSubTypeTask',
      "job_limit": 'jobLimit',
      "min_level": 'minLevel',
      "pet_cage_type": 'petcagetype',
      "related_gm_activity_type": 'relatedGMActivityType',
      "res_id": 'resId',
      "stackable": 'stackable',
      "static_id": 'staticId',
  }

  item_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      item_entry[col] = parsed_dict[def_key]
    except:
      pass
  item_entries.append(item_entry)

item_df = pd.DataFrame(item_entries)

### Merge with information from `en_langs`

#### Item name


In [None]:
item_name_df = item_name_df.rename(columns={"value": "item_name"})
item_df["id"] = item_df["id"].apply(int)
item_name_df["id"] = item_name_df["id"].apply(int)
item_df = pd.merge(item_df, item_name_df, how="left", left_on="id", right_on="id")

#### Item description

In [None]:
item_desc_df["id"] = item_desc_df["id"].apply(lambda x: f"ItemDes_{x}")
item_desc_df = item_desc_df.rename(columns={"id": "item_desc", "value": "item_desc_en"})
item_df = pd.merge(item_df, item_desc_df, how="left", left_on="item_desc", right_on="item_desc")

#### Item type

In [None]:
item_df["item_type"] = item_df["item_type"].fillna(0).apply(int)
item_df["item_subtype"] = item_df["item_subtype"].fillna(0).apply(int)
item_df["item_type_subtype"] = item_df.apply(lambda x: f'{x["item_type"]}_{x["item_subtype"]}', axis=1)
item_type_df = item_type_df.rename(columns={"id": "item_type_subtype", "value": "item_type_en"})
item_df = pd.merge(item_df, item_type_df, how="left", left_on="item_type_subtype", right_on="item_type_subtype")

### Save raw data

In [None]:
item_df.to_csv(f"{MAIN_PARSED_DIR}/item_raw_{APK_DATE}.csv", index=False)

### Save data for database

In [None]:
item_df.loc[(~item_df["card_coordinate_point"].isnull()), 'res_id'] = 99999
item_final_df = item_df[["id", "item_name", "item_desc_en", "res_id", "item_type_en", "cd", "max_stack", "stackable", "weight", "sell_price", "stall_price_lower_limit", "stall_price_upper_limit", "item_quality"]]
item_final_df["res_id"] = item_final_df["res_id"].fillna(item_final_df["id"])
item_final_df = item_final_df[~item_final_df["item_name"].isnull()]

item_final_df = item_final_df.rename(columns={
    "item_name": "name",
    "item_desc_en": "description",
    "res_id": "res_id",
    "item_type_en": "type",
    "item_quality": "quality"
    })

item_final_df["type"] = item_final_df["type"].fillna("Uncategorized")
item_final_df["name"] = item_final_df["name"].str.lower()
item_final_df["is_visible"] = item_final_df["id"].apply(lambda x: 0 if x < 20000 else 1)

item_final_df.to_csv(f"{MAIN_PARSED_DIR}/item_{APK_DATE}.csv", index=False)

## Skill Preitem

### Create entries for dataframe creation

In [None]:
skill_pre_item_records = skill_df[~skill_df["pre_item"].isnull()][["id", "pre_item"]].to_dict(orient="records")

In [None]:
skill_pre_item_fins = []

for record in skill_pre_item_records:
  if record["pre_item"] != {}:
    for pre_item in record["pre_item"]:
      skill_pre_item_fin = {}
      skill_pre_item_fin["skill_id"] = record["id"]
      skill_pre_item_fin["item_id"] = pre_item["ItemId"]
      skill_pre_item_fin["item_num"] = pre_item["Number"]
      
      skill_pre_item_fins.append(skill_pre_item_fin)

In [None]:
skill_pre_item_df = pd.DataFrame(skill_pre_item_fins)

In [None]:
intersections = set(skill_pre_item_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_pre_item_df = skill_pre_item_df[skill_pre_item_df["skill_id"].astype(float).isin(intersections)]

intersections = set(skill_pre_item_df["item_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
skill_pre_item_df = skill_pre_item_df[skill_pre_item_df["item_id"].astype(float).isin(intersections)]

In [None]:
skill_pre_item_final_df = skill_pre_item_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_pre_item_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_pre_item_{APK_DATE}.csv", index=False)

## Skill Buff

### Create entries for dataframe creation

In [None]:
skill_buff_list_records = skill_df[~skill_df["buff_list"].isnull()][["id", "buff_list"]].to_dict(orient="records")

In [None]:
skill_buff_list_fins = []

for record in skill_buff_list_records:
  if record["buff_list"] != {}:
    for buff_list in list(record["buff_list"]):
      skill_buff_list_fin = {}
      skill_buff_list_fin["skill_id"] = record["id"]
      skill_buff_list_fin["buff"] = buff_list
      
      skill_buff_list_fins.append(skill_buff_list_fin)

In [None]:
skill_buff_df = pd.DataFrame(skill_buff_list_fins)

In [None]:
intersections = set(skill_buff_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_buff_df = skill_buff_df[skill_buff_df["skill_id"].astype(float).isin(intersections)]

In [None]:
skill_buff_final_df = skill_buff_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_buff_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_buff_{APK_DATE}.csv", index=False)

## Skill Cost Item

### Create entries for dataframe creation

In [None]:
skill_cost_item_records = skill_df[~skill_df["cost_item"].isnull()][["id", "cost_item"]].to_dict(orient="records")

In [None]:
skill_cost_item_fins = []

for record in skill_cost_item_records:
  if record["cost_item"] != {}:
    for cost_item in record["cost_item"]:
      skill_cost_item_fin = {}
      skill_cost_item_fin["skill_id"] = record["id"]
      skill_cost_item_fin["item_id"] = cost_item["ItemId"]
      skill_cost_item_fin["item_num"] = cost_item["Number"]
      
      skill_cost_item_fins.append(skill_cost_item_fin)

In [None]:
skill_cost_item_df = pd.DataFrame(skill_cost_item_fins)

In [None]:
skill_cost_item_df

Unnamed: 0,skill_id,item_id,item_num
0,199994,10202072,1
1,199995,10202070,1
2,199995,10202071,1
3,199996,10202157,1
4,199997,10202158,1
5,199997,10202159,1
6,199997,10202160,1


In [None]:
intersections = set(skill_cost_item_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_cost_item_df = skill_cost_item_df[skill_cost_item_df["skill_id"].astype(float).isin(intersections)]

intersections = set(skill_cost_item_df["item_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
skill_cost_item_df = skill_cost_item_df[skill_cost_item_df["item_id"].astype(float).isin(intersections)]

In [None]:
skill_cost_item_final_df = skill_cost_item_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_cost_item_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_cost_item_{APK_DATE}.csv", index=False)

## Skill Factor

### Parse data

In [None]:
texts = []

LIMIT = 9029399292
# with open(f"test.txt", "r", encoding="utf8") as filename:
with open(f"{MAIN_CLEANED_DIR}/data_SkillFactor.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())

texts = " ".join(texts)
texts = re.sub(r'([A-Za-z]+) =', '"\g<1>" :', texts)
texts = re.sub(r'\[[\d]+\] = ', '', texts)

# texts += ']'

skill_factors = literal_eval(texts)

### Create entries for DataFrame

In [None]:
skill_factor_entries = []

for skill_factor in skill_factors:
  skill_factor_entry = {}

  default_key_dict = {
      "id": "Id",
      "factor_name": "FactorName",
      "factor_order": "FactorOrder",
      "final_factor": "FinalFactor",
      "skill_id": "SkillId",
      "skill_level": "SkillLevel"
  }

  try:
    skill_factor_entry["id"] = skill_factor["Id"]
  except:
    pass

  for col, def_key in default_key_dict.items():
    try:
      skill_factor_entry[col] = skill_factor[def_key]
    except:
      pass
  skill_factor_entries.append(skill_factor_entry)

skill_factor_df = pd.DataFrame(skill_factor_entries)

### Fix missing values

In [None]:
skill_factor_df["factor_order"] = skill_factor_df["factor_order"].fillna(1)
skill_factor_df["id"] = skill_factor_df["id"].fillna(1)
skill_factor_df["skill_level"] = skill_factor_df["skill_level"].fillna(1)
skill_factor_df["skill_id"] = skill_factor_df["skill_id"].fillna(0)
skill_factor_df["final_factor"] = skill_factor_df["final_factor"].fillna(20)

### Save for database

In [None]:
intersections = set(skill_factor_df["skill_id"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_factor_final_df = skill_factor_df[skill_factor_df["skill_id"].astype(int).isin(intersections)]

skill_factor_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_factor_{APK_DATE}.csv", index=False)

## Skill Description Arguments

### Create entries for dataframe creation

In [None]:
skill_desc_args_records = skill_df[skill_df["desc_args"].str.len() != 0][["id", "desc_args"]].to_dict(orient="records")

In [None]:
skill_desc_args_fins = []

for record in skill_desc_args_records:
  if record["desc_args"] != []:
    for desc_args in record["desc_args"]:
      skill_desc_args_fin = {}
      skill_desc_args_fin["skill_id"] = record["id"]
      skill_desc_args_fin["factor"] = desc_args["Factor"]
      skill_desc_args_fin["factor_bit"] = desc_args["FactorBit"]
      skill_desc_args_fin["level_type"] = desc_args["LevelType"]

      try:
        skill_desc_args_fin["type"] = desc_args["type"]
      except:
        pass
      
      skill_desc_args_fins.append(skill_desc_args_fin)

skill_desc_args_df = pd.DataFrame(skill_desc_args_fins)

In [None]:
intersections = set(skill_desc_args_df["skill_id"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_factor_final_df = skill_desc_args_df[skill_desc_args_df["skill_id"].astype(int).isin(intersections)]

In [None]:
skill_desc_args_final_df = skill_desc_args_df.reset_index().rename(columns={"index": "id"})

### Save for database

In [None]:
skill_desc_args_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_desc_args_{APK_DATE}.csv", index=False)

## Skill Preskill

### Create entries for dataframe creation

In [None]:
skill_pre_skill_records = skill_df[~skill_df["pre_skill"].isnull()][["id", "pre_skill"]].to_dict(orient="records")

skill_pre_skill_fins = []

for record in skill_pre_skill_records:
  if record["pre_skill"] != []:
    for pre_skill in record["pre_skill"]:
      skill_pre_skill_fin = {}
      skill_pre_skill_fin["skill_id"] = record["id"]
      skill_pre_skill_fin["pre_skill"] = pre_skill["SkillId"]
      skill_pre_skill_fin["pre_skill_level"] = pre_skill["SkillLevel"]
      
      skill_pre_skill_fins.append(skill_pre_skill_fin)

skill_pre_skill_df = pd.DataFrame(skill_pre_skill_fins)

skill_pre_skill_df = skill_pre_skill_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_pre_skill_df["pre_skill"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_pre_skill_df = skill_pre_skill_df[skill_pre_skill_df["pre_skill"].astype(int).isin(intersections)]

intersections = set(skill_pre_skill_df["skill_id"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
skill_pre_skill_final_df = skill_pre_skill_df[skill_pre_skill_df["skill_id"].astype(int).isin(intersections)]

### Save for database

In [None]:
skill_pre_skill_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_pre_skill_{APK_DATE}.csv", index=False)

## Skill Required Mount

### Create entries for dataframe creation

In [None]:
skill_require_mount_id_records = skill_df[~skill_df["require_mount_id"].isnull()][["id", "require_mount_id"]].to_dict(orient="records")

skill_require_mount_id_fins = []

for record in skill_require_mount_id_records:
  if record["require_mount_id"] != []:
    for require_mount_id in list(record["require_mount_id"]):
      skill_require_mount_id_fin = {}
      skill_require_mount_id_fin["skill_id"] = record["id"]
      skill_require_mount_id_fin["require_mount_id"] = require_mount_id
      
      skill_require_mount_id_fins.append(skill_require_mount_id_fin)

skill_require_mount_id_df = pd.DataFrame(skill_require_mount_id_fins)

skill_require_mount_id_df = skill_require_mount_id_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_require_mount_id_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_require_mount_id_final_df = skill_require_mount_id_df[skill_require_mount_id_df["skill_id"].astype(float).isin(intersections)]

# intersections = set(skill_require_mount_id_df["require_mount_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
# skill_require_mount_id_final_df = skill_require_mount_id_df[skill_require_mount_id_df["require_mount_id"].astype(float).isin(intersections)]

### Save for database

In [None]:
skill_require_mount_id_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_require_mount_id_{APK_DATE}.csv", index=False)

## Skill Max HP Cost

### Create entries for dataframe creation

In [None]:
skill_max_hp_cost_records = skill_df[~skill_df["max_hp_cost"].isnull()][["id", "max_hp_cost"]].to_dict(orient="records")

skill_max_hp_cost_fins = []

for record in skill_max_hp_cost_records:
  if record["max_hp_cost"] != []:
    for max_hp_cost in list(record["max_hp_cost"]):
      skill_max_hp_cost_fin = {}
      skill_max_hp_cost_fin["skill_id"] = record["id"]
      skill_max_hp_cost_fin["max_hp_cost"] = max_hp_cost
      
      skill_max_hp_cost_fins.append(skill_max_hp_cost_fin)

skill_max_hp_cost_df = pd.DataFrame(skill_max_hp_cost_fins)

skill_max_hp_cost_df = skill_max_hp_cost_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_max_hp_cost_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_max_hp_cost_final_df = skill_max_hp_cost_df[skill_max_hp_cost_df["skill_id"].astype(float).isin(intersections)]

### Save for database

In [None]:
skill_max_hp_cost_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_max_hp_cost_{APK_DATE}.csv", index=False)

## Skill Zeny Cost

### Create entries for dataframe creation

In [None]:
skill_cost_zeny_records = skill_df[~skill_df["cost_zeny"].isnull()][["id", "cost_zeny"]].to_dict(orient="records")

skill_cost_zeny_fins = []

for record in skill_cost_zeny_records:
  if record["cost_zeny"] != []:
    for cost_zeny in list(record["cost_zeny"]):
      skill_cost_zeny_fin = {}
      skill_cost_zeny_fin["skill_id"] = record["id"]
      skill_cost_zeny_fin["cost_zeny"] = cost_zeny
      
      skill_cost_zeny_fins.append(skill_cost_zeny_fin)

skill_cost_zeny_df = pd.DataFrame(skill_cost_zeny_fins)

skill_cost_zeny_df = skill_cost_zeny_df.reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(skill_cost_zeny_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
skill_zeny_cost_final_df = skill_cost_zeny_df[skill_cost_zeny_df["skill_id"].astype(float).isin(intersections)]

### Save for database

In [None]:
skill_zeny_cost_final_df.to_csv(f"{MAIN_PARSED_DIR}/skill_zeny_cost_{APK_DATE}.csv", index=False)

## Equip Suit

### Parse data

In [None]:
texts = []

LIMIT = 328932992
with open(f"{MAIN_CLEANED_DIR}/data_equip_EquipmentSuit.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())

texts = " ".join(texts)
texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

equipment_suits = literal_eval(texts)    

### Create entries for DataFrame creation

In [None]:
equipment_suit_entries = []

for id, parsed_dict in equipment_suits.items():
  equipment_suit_entry = {}

  default_key_dict = {
      "id": "ID",
      "argument_id": "argumentID",
      "argument_order": "argumentOrder",
      "argument_value": "argumentValue",
      "equip_id": "equip_id",
      "name": "name",
      "skill_id": "skillId",
      "suit_id": "suitId",
      "suit_num": "suitNum"
  }
  equipment_suit_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      equipment_suit_entry[col] = parsed_dict[def_key]
    except:
      pass
  equipment_suit_entries.append(equipment_suit_entry)

equip_suit_raw_df = pd.DataFrame(equipment_suit_entries)

In [None]:
equip_suit_raw_df["argument_value"] = equip_suit_raw_df["argument_value"].apply(list)

### Break entries down

In [None]:
equip_suit_raw_df = equip_suit_raw_df[~equip_suit_raw_df["equip_id"].isnull()]

isnull =equip_suit_raw_df["argument_value"].isnull()
equip_suit_raw_df.loc[isnull, 'argument_value'] = pd.Series([[None]] * isnull.sum()).values

isnull =equip_suit_raw_df["argument_order"].isnull()
equip_suit_raw_df.loc[isnull, 'argument_order'] = pd.Series([[None]] * isnull.sum()).values

equip_suit_rec_df = equip_suit_raw_df[["id", "name", "skill_id", "equip_id", "argument_order", "argument_value", "suit_num"]]
equip_suit_recs = equip_suit_rec_df.to_dict(orient="records")

equip_suit_rec_news = []

for equip_suit_rec in equip_suit_recs:
  for i, equip_id in enumerate(equip_suit_rec["equip_id"]):
    new_entry = {}
    new_entry["id"] = equip_suit_rec["id"]
    new_entry["equip_id"] = equip_id
    new_entry["argument_value"] = equip_suit_rec["argument_value"][0]
    new_entry["argument_order"] = equip_suit_rec["argument_order"][0]
    new_entry["name"] = equip_suit_rec["name"]
    new_entry["skill_id"] = equip_suit_rec["skill_id"]
    new_entry["suit_num"] = equip_suit_rec["suit_num"]
  
    equip_suit_rec_news.append(new_entry)

equip_suit_df = pd.DataFrame(equip_suit_rec_news)

### Data manipulation

In [None]:
equip_suit_manip_df = equip_suit_df[["argument_value", "name", "skill_id", "suit_num"]].drop_duplicates().sort_values("name")

In [None]:
equip_suit_manip_df["skill_id"] = equip_suit_manip_df["skill_id"].fillna("81010000")
equip_suit_manip_df = equip_suit_manip_df[~equip_suit_manip_df["name"].isnull()]
equip_suit_manip_df = equip_suit_manip_df[~equip_suit_manip_df["name"].str.startswith("Fashion_ShowEffect")]

In [None]:
def get_blue_argument_value(x):

  try:
    match = re.match("SuitName(\w+)", x["name"])
    equipment_suit_code = int(match.group(1))
  except:
    pass
    # match = re.match("Fashion_ShowEffect(\w+)", x["name"])
    # equipment_suit_code = int(match.group(1))
  
  if 100 < equipment_suit_code and equipment_suit_code < 500:
    if int(x["skill_id"]) == 81010000:
      return (equipment_suit_code - 99) * 0.005
    if int(x["skill_id"]) == 81010001:
      return 0.20 + ((equipment_suit_code - 99) * 0.05)
    if int(x["skill_id"]) == 81010002:
      return (equipment_suit_code - 99) * 0.05
  elif equipment_suit_code == 4:
    return 0.1
  elif equipment_suit_code == 6000:
    return 0.25
  elif equipment_suit_code == 8114:
    return 0.1
  elif equipment_suit_code == 1020:
    if int(x["suit_num"]) == 3:
      return 0.05
    elif int(x["suit_num"]) == 6:
      return 0.075
    elif int(x["suit_num"]) == 8:
      return 0.1
  elif equipment_suit_code == 2020:
    if int(x["suit_num"]) == 3:
      return 0.1
    elif int(x["suit_num"]) == 6:
      return 0.15
    elif int(x["suit_num"]) == 8:
      return 0.2
  elif equipment_suit_code == 2060:
    if int(x["suit_num"]) == 3:
      return 0.05
    elif int(x["suit_num"]) == 6:
      return 0.075
    elif int(x["suit_num"]) == 8:
      return 0.1
  elif equipment_suit_code == 2070:
    if int(x["suit_num"]) == 3:
      return 0.05
    elif int(x["suit_num"]) == 6:
      return 0.075
    elif int(x["suit_num"]) == 8:
      return 0.1
  elif equipment_suit_code == 3020:
    if int(x["suit_num"]) == 3:
      return 0.03
    elif int(x["suit_num"]) == 6:
      return 0.04
    elif int(x["suit_num"]) == 8:
      return 0.05
  elif equipment_suit_code == 4020:
    if int(x["suit_num"]) == 3:
      return 0.1
    elif int(x["suit_num"]) == 6:
      return 0.15
    elif int(x["suit_num"]) == 8:
      return 0.2
  elif equipment_suit_code == 5020:
    if int(x["suit_num"]) == 3:
      return 0.03
    elif int(x["suit_num"]) == 6:
      return 0.04
    elif int(x["suit_num"]) == 8:
      return 0.05

  return np.nan 

In [None]:
equip_suit_manip_df["suit_num"] = equip_suit_manip_df["suit_num"].fillna(3)

equip_suit_manip_df["init_argument_value"] = equip_suit_manip_df.apply(get_blue_argument_value, axis=1)
equip_suit_manip_df["argument_order"] = 1
equip_suit_manip_df = equip_suit_manip_df[~equip_suit_manip_df["init_argument_value"].isnull()]

#### Manipulate `SuitName101`

In [None]:
equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName102'")
equip_suit_manip_copy_df["name"] = 'SuitName101'

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.005
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.25
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.05

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df, ignore_index=True)

#### Manipulate `SuitName2020`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName2020", "argument_order"] = 2

equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName2020'")

equip_suit_manip_copy_df["argument_order"] = 1

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df, ignore_index=True)

#### Manipulate `SuitName2060`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName2060", "argument_order"] = 3

equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName2060'")

equip_suit_manip_copy_df["argument_order"] = 1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 2
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

equip_suit_manip_copy_df["argument_order"] = 2
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

#### Manipulate `SuitName2070`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName2070", "argument_order"] = 2

equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName2070'")

equip_suit_manip_copy_df["argument_order"] = 1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 0.2
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 0.15
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 0.1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

equip_suit_manip_copy_df["argument_order"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

#### Manipulate `SuitName4020`

In [None]:
equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName4020'")

equip_suit_manip_copy_df["argument_order"] = 2
equip_suit_manip_copy_df["init_argument_value"] = 1
equip_suit_manip_copy_df["argument_value"] = 0.5

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

#### Manipulate `SuitName5020`

In [None]:
equip_suit_manip_df.loc[equip_suit_manip_df["name"] == "SuitName5020", "argument_order"] = 2
equip_suit_manip_copy_df = equip_suit_manip_df.query("name == 'SuitName5020'")

equip_suit_manip_copy_df["argument_order"] = 1
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "init_argument_value"] = 3
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 3, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "init_argument_value"] = 4
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 6, "argument_value"] = np.nan

equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "init_argument_value"] = 5
equip_suit_manip_copy_df.loc[equip_suit_manip_df["suit_num"] == 8, "argument_value"] = np.nan

equip_suit_manip_df = equip_suit_manip_df.append(equip_suit_manip_copy_df.copy(), ignore_index=True)

### Merge with information from `en_langs`

#### Suit name

In [None]:
equip_suit_manip_df["name"] = equip_suit_manip_df["name"].apply(lambda x: int(x.replace("SuitName", "")))
equip_suit_manip_df = equip_suit_manip_df.rename(columns={"name": "suit_id"})

equip_suit_fin_df = equip_suit_manip_df
equip_suit_fin_df = equip_suit_fin_df.drop_duplicates().reset_index(drop=True)

suit_name_df = pd.DataFrame(parsed_data["suit_name"])
suit_name_df = suit_name_df.rename(columns={"id": "suit_id", "value": "name"})
suit_name_df["suit_id"] = suit_name_df["suit_id"].apply(int)

equip_suit_fin_df = pd.merge(equip_suit_fin_df, suit_name_df, how="left", left_on="suit_id", right_on="suit_id")

### Save data for database

#### Equipment suit

In [None]:
equip_suit_df = equip_suit_fin_df[["suit_id", "name"]].drop_duplicates().reset_index(drop=True).rename(columns={"suit_id": "id"})

In [None]:
equip_suit_final_df = equip_suit_df.copy()

In [None]:
equip_suit_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_suit_{APK_DATE}.csv", index=False)

#### Equipment Suit Skill

In [None]:
 equip_suit_skill_df = equip_suit_fin_df[["suit_id", "skill_id", "suit_num"]].drop_duplicates(["suit_id", "skill_id", "suit_num"]).reset_index().rename(columns={"index": "id"})

In [None]:
intersections = set(equip_suit_skill_df["skill_id"].astype(float)).intersection(set(skill_complex_final_df["id"].astype(float)))
equip_suit_skill_final_df = equip_suit_skill_df[equip_suit_skill_df["skill_id"].astype(float).isin(intersections)]

In [None]:
equip_suit_skill_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_suit_skill_{APK_DATE}.csv", index=False)

#### Equipment Suit Skill Argument

In [None]:
equip_suit_skill_arg_df = pd.merge(equip_suit_fin_df, equip_suit_skill_final_df, how="left", left_on=["suit_id", "skill_id", "suit_num"], right_on=["suit_id", "skill_id", "suit_num"]).rename(columns={"id": "suit_skill_id"})

In [None]:
equip_suit_skill_arg_df = equip_suit_skill_arg_df[["suit_skill_id", "argument_order", "init_argument_value", "argument_value"]].reset_index().rename(columns={"index": "id"})

In [None]:
equip_suit_skill_arg_final_df = equip_suit_skill_arg_df.drop_duplicates(["suit_skill_id",	"argument_order"], keep="first")

In [None]:
equip_suit_skill_arg_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_suit_skill_arg_{APK_DATE}.csv", index=False)

## Equip

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_equip_Equip.bytes", "r", encoding="utf-8") as filename:

  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)
texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"


equips = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
equip_entries = []

for id, parsed_dict in equips.items():
  equip_entry = {}

  default_key_dict = {
      "sell_price": 'SellPrice',
      "base_prop": 'baseProperty',
      "name": "name",
      "desc": 'desc',
      "type": 'equipmentType',
      "improved_level": 'improvedLevel',
      "init_holes": 'initHoles',
      "is_all_job": 'isAllJob',
      "is_bind": 'isBind',
      "is_fashion": 'isFashion',
      "job_limit": 'jobLimit',
      "max_holes": 'maxHoles',
      "min_level_limit" : 'minLvLimit',
      "prop_level": 'propLevel',
      "quality": 'quality',
      "res_id": 'resId',
      "trade": 'trade',
      "refine_id": "RefineID",
      "static_id": 'staticId',
      "wardrobe_value":'wardrobeValue',
      "decomposition_output_id": "DecompositionOutputId"
  }

  equip_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      equip_entry[col] = parsed_dict[def_key]
    except:
      pass
  equip_entries.append(equip_entry)

equip_df = pd.DataFrame(equip_entries)

### Get equipment information from `en_langs`

In [None]:
equip_name_df = pd.DataFrame(parsed_data["equip_name"])
equip_desc_df = pd.DataFrame(parsed_data["equip_desc"])
equip_type_df = pd.DataFrame(parsed_data["equip_type"])
equipment_attr_desc_df = pd.DataFrame(parsed_data["property"])

### Select columns for database

In [None]:
equip_all_df = equip_df[["id", "name", "desc", "type", "init_holes", "is_bind", "max_holes", "res_id", "static_id", "improved_level", "min_level_limit", "prop_level", "quality", "refine_id", "sell_price", "is_all_job", "is_fashion", "wardrobe_value", "decomposition_output_id"]]

### Merge with information from `en_langs`

#### Equip name

In [None]:
equip_name_df["id"] = equip_name_df["id"].apply(lambda x: f"EquipName_{x}")
equip_name_df = equip_name_df.rename(columns={"id": "name", "value":"equipment_name"})
equip_all_df = pd.merge(equip_all_df, equip_name_df, how="left", left_on=["name"], right_on=["name"])

#### Equip description

In [None]:
equip_desc_df["id"] = equip_desc_df["id"].apply(lambda x: f"EquipDesc_{x}")
equip_desc_df = equip_desc_df.rename(columns={"id": "desc", "value":"equipment_desc"})
equip_all_df = pd.merge(equip_all_df, equip_desc_df, how="left", left_on=["desc"], right_on=["desc"])

#### Equip type

In [None]:
equip_all_df["type"] = equip_all_df["type"].astype(float)
equip_type_df = equip_type_df.rename(columns={"id": "type", "value":"equipment_type"})
equip_type_df["type"] = equip_type_df["type"].astype(float)
equip_all_df = pd.merge(equip_all_df, equip_type_df, how="left", left_on=["type"], right_on=["type"])

### Filter column for database

In [None]:
equip_final_df = equip_all_df[["id", "equipment_name", "equipment_desc", "equipment_type", "static_id", "res_id", "init_holes", "max_holes", "min_level_limit", "prop_level", "quality", "refine_id", "sell_price", "is_all_job", "wardrobe_value", "improved_level", "decomposition_output_id"]]

### Data correction

In [None]:
equip_final_df["equipment_type"] = equip_final_df["equipment_type"].fillna("Accessory - Decoration")
equip_final_df = equip_final_df[~equip_final_df["equipment_name"].isnull()]
equip_final_df["equipment_name"] = equip_final_df["equipment_name"].str.replace("I", "I")
equip_final_df["equipment_group"] = equip_final_df["equipment_name"].str.replace(" III", "").str.replace(" II", "").str.replace(" IV", "").str.replace(" VI", "").str.replace(" V", "").str.replace(" I", "")
equip_final_df["quality"] = equip_final_df["quality"].fillna(1)
equip_final_df["improved_level"] = equip_final_df["improved_level"].fillna(0)
equipment_group_df = equip_final_df.query("improved_level == 0")[["equipment_group", "static_id"]].rename(columns={"static_id":"correct_static_id"})
equip_final_df = pd.merge(equip_final_df, equipment_group_df, how="left", left_on=["equipment_group"], right_on=["equipment_group"])
equip_final_df["static_id"] = equip_final_df["correct_static_id"]
equip_final_df = equip_final_df.drop_duplicates("id", keep="first")

### Add equipment suit info

In [None]:
def get_final_level(x):
  return (x["prop_level"] - (x["improved_level"]*10))

equip_final_df["final_level"] = equip_final_df.apply(get_final_level, axis=1)

def assign_equipment_suit(x):
  final_level = x["final_level"]
  quality = x["quality"]

  if quality == 1:
    if final_level == 30: 
      return 1020
    elif final_level == 40:
      return 2020
    elif final_level == 50:
      return 3020
    elif final_level == 60:
      return 4020
    elif final_level == 70:
      return 5020
    elif final_level == 80:
      return 2060
    elif final_level == 90:
      return 2070
    elif final_level == 100:
      return 2080
    elif final_level == 110:
      return 2090
    elif final_level == 120:
      return 2100
  elif quality == 2:
    return 100 + (final_level - 20) / 10
  else:
    return np.nan

equip_final_df["suit_id"] = equip_final_df.apply(assign_equipment_suit, axis=1)

In [None]:
equip_final_df.loc[equip_final_df["suit_id"] == 2080, "suit_id"] = np.nan

In [None]:
equip_final_df.shape

(3934, 21)

In [None]:
equip_final_temp_df = equip_final_df[equip_final_df["suit_id"].isnull()]

In [None]:
intersections = set(equip_final_df["suit_id"].astype(float)).intersection(set(equip_suit_final_df["id"].astype(float)))
equip_final_df = equip_final_df[equip_final_df["suit_id"].astype(float).isin(intersections)]

In [None]:
equip_final_df = equip_final_df.append(equip_final_temp_df, ignore_index=True)

### Save data for database

In [None]:
equip_final_df = equip_final_df.rename(columns={
    "equipment_name": "name",
    "equipment_desc": "description",
    "equipment_type": "type",
    "decomposition_output_id": "decomposition_id"
}).drop(["equipment_group", "correct_static_id", "final_level"], axis=1)

equip_final_df["name"] = equip_final_df["name"].str.lower()
equip_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_{APK_DATE}.csv", index=False)

## Equip Attributes

### Get data from Equip

In [None]:
new_records = []

records = equip_df[["id", "base_prop"]].to_dict(orient="records")

for record in records:
  
  if type(record["base_prop"]) == dict:
    for attr, value in record["base_prop"].items():
      new_record = {}

      new_record["id"] = record["id"]
      new_record["attr"] = attr
      new_record["value"] = value

      new_records.append(new_record)
  else:
    new_record = {}

    new_record["id"] = record["id"]
    new_record["attr"] = np.nan
    new_record["value"] = np.nan

    new_records.append(new_record)

equipment_attributes_df = pd.DataFrame(new_records)

### Merge with information from `en_langs`

#### Attribute Description

In [None]:
equipment_attr_desc_df = equipment_attr_desc_df.rename(columns={"id": "attr", "value": "attributes"})

equipment_attr_desc_df["attr"] = equipment_attr_desc_df["attr"].astype(float)
equipment_attributes_df["attr"] = equipment_attributes_df["attr"].astype(float)

equipment_attr_fin_df = pd.merge(equipment_attributes_df, equipment_attr_desc_df, how="left", left_on=["attr"], right_on=["attr"])
equipment_attr_fin_df = equipment_attr_fin_df[["id", "attributes", "value"]]

### Sava data for database

In [None]:
equipment_attr_fin_df = equipment_attr_fin_df.rename(columns={
    "id": "equip_id",
    "attributes": "attribute",
    "value": "attribute_value"
})

equipment_attr_fin_df = equipment_attr_fin_df.reset_index(drop=True)
equipment_attr_fin_df["id"] = pd.Series(range(0, equipment_attr_fin_df.shape[0]))

intersections = set(equipment_attr_fin_df["equip_id"].astype(int)).intersection(set(equip_final_df["id"].astype(int)))
equipment_attributes_final_df = equipment_attr_fin_df[equipment_attr_fin_df["equip_id"].astype(int).isin(intersections)]

In [None]:
equipment_attributes_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_attributes_{APK_DATE}.csv", index=False)

## Material

### Get equip and item data

In [None]:
material_one_df = item_final_df[["id", "name", "description", "res_id"]]
material_one_df["is_item"] = 1
material_two_df = equip_final_df[["id", "name", "description", "res_id"]]
material_two_df["is_item"] = 0

### Data correction

In [None]:
material_final_df = material_one_df.append(material_two_df, ignore_index=True)
material_final_df["name"] = material_final_df["name"].str.lower()

material_final_df.loc[material_final_df["name"].str.contains("card"), 'res_id'] = 99999

### Save data for database

In [None]:
material_final_df.to_csv(f"{MAIN_PARSED_DIR}/material_{APK_DATE}.csv", index=False)

In [None]:
material_df = material_final_df.copy()

## Job

### Get data from `en_langs`

In [None]:
job_df = pd.DataFrame(parsed_data["job_name"]).rename(columns={"value": "name"})
job_df["name"] = job_df["name"].str.lower()

In [None]:
job_final_df = job_df.copy()

### Save data for database

In [None]:
job_final_df.to_csv(f"{MAIN_PARSED_DIR}/job_{APK_DATE}.csv", index=False)

## Equip Job

### Get data from Equip

In [None]:
equip_job_raw_df = equip_df[["id", "job_limit"]][~equip_df["job_limit"].isnull()]
equip_job_raw_df["job_limit"] = equip_job_raw_df["job_limit"].apply(list)

### Create records

In [None]:
new_records = []

records = equip_job_raw_df[["id", "job_limit"]].to_dict(orient="records")

for record in records:
  for job in record["job_limit"]:
    new_record = {}

    new_record["id"] = record["id"]
    new_record["job"] = job

    new_records.append(new_record)

equip_job_df = pd.DataFrame(new_records)

### Save data for database

In [None]:
equip_job_df = equip_job_df.reset_index().rename(columns={"id" : "equip_id", "job": "job_id"}).rename(columns={"index": "id"})

intersections = set(equip_job_df["job_id"].astype(float)).intersection(set(job_final_df["id"].astype(float)))
equip_job_df = equip_job_df[equip_job_df["job_id"].astype(float).isin(intersections)]

intersections = set(equip_job_df["equip_id"].astype(float)).intersection(set(equip_final_df["id"].astype(float)))
equip_job_final_df = equip_job_df[equip_job_df["equip_id"].astype(float).isin(intersections)]

In [None]:
equip_job_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_job_{APK_DATE}.csv", index=False)

## Drop

### Drop V2

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_dropV2_DropV2.bytes", "r", encoding="utf-8") as filename:

  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\]=', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

texts = texts.replace("{}", "[]")
texts = texts.replace("{ {", "[ {")
texts = texts.replace("} } }, {", "AAAAA")
texts = texts.replace("} } } }", "BBBBB")
texts = texts.replace("} } }", "XXXXX")
texts = texts.replace("} }, {", "ZZZZZ")
texts = texts.replace("} }", "YYYYY")
texts = texts.replace("XXXXX", "} } ]")
texts = texts.replace("ZZZZZ", "} }, {")
texts = texts.replace("YYYYY", "} ]")
texts = texts.replace("AAAAA", "} ] }, {")
texts = texts.replace("BBBBB", "} ] } ]")

texts = "{" + texts + "}"

drops = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
drop_entries = []

for id, parsed_dict in drops.items():
  drop_entry = {}

  default_key_dict = {
      "id": "id",
      "is_card_drop": "IsCardDrop",
      "fixed_drop_amount": 'fixedDropAmount',
      "fixed_drop_static_id": "fixedDropStaticId",
      "fixed_drop_type": "fixedDropType",
      "random_drop_collections_id": "randomDropCollectionsId",
      "random_drop_collections_sp_plus": "randomDropCollectionsSpPlus",
      "random_drop_collections_sp_reduce": "randomDropCollectionsSpReduce",
      "random_drop_collections_weight": "randomDropCollectionsWeight",
      "random_drop_probability": "randomDropProbability",
      "random_drop_probability_denominator": "randomDropProbabilityDenominator",
      "random_times": "randomTimes"
  }

  drop_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      drop_entry[col] = parsed_dict[def_key]
    except:
      pass
  drop_entries.append(drop_entry)

drop_info_df = pd.DataFrame(drop_entries)

### Parse Fixed Drop

In [None]:
fixed_drop_info_df = drop_info_df[drop_info_df["fixed_drop_amount"].apply(len) > 0]

In [None]:
fixed_drop_info_fins = []
for record in fixed_drop_info_df.to_dict(orient="records"):
  for i in range(len(record["fixed_drop_amount"])):
    fixed_drop_info_fin = {}
    fixed_drop_info_fin["drop_id"] = record["id"]
    fixed_drop_info_fin["item_id"] = record["fixed_drop_type"][i]
    fixed_drop_info_fin["item_num"] = record["fixed_drop_amount"][i]

    fixed_drop_info_fins.append(fixed_drop_info_fin)

fixed_drop_info_fin_df = pd.DataFrame(fixed_drop_info_fins)

### Save Fixed Drop data

In [None]:
fixed_drop_info_fin_df.to_csv(f"{MAIN_PARSED_DIR}/fixed_drop_{APK_DATE}.csv", index=False)

In [None]:
fixed_drop_info_fin_df

Unnamed: 0,drop_id,item_id,item_num
0,100001,5,1
1,100001,5,10
2,1000011011,6,5000
3,1000011011,7,5000
4,1000011011,1,200
...,...,...,...
8395,99999,4,1
8396,99999,4,1
8397,99999,4,1
8398,99999,4,1


### Parse Random Drop

In [None]:
random_drop_info_df = drop_info_df[drop_info_df["random_drop_collections_id"].apply(len) > 0]

In [None]:
random_drop_info_fins = []
for record in random_drop_info_df.to_dict(orient="records"):
  for i in range(len(record["random_drop_collections_id"])):
    random_drop_info_fin = {}
    random_drop_info_fin["drop_id"] = record["id"]
    random_drop_info_fin["drop_collections_id"] = record["random_drop_collections_id"][i]
    random_drop_info_fin["drop_collections_weight"] = record["random_drop_collections_weight"][i]
    random_drop_info_fin["probability"] = record["random_drop_probability"]
    random_drop_info_fin["probability_denominator"] = record["random_drop_probability_denominator"]
    random_drop_info_fin["times"] = record["random_times"]

    random_drop_info_fins.append(random_drop_info_fin)

random_drop_info_fin_df = pd.DataFrame(random_drop_info_fins)

### Save Complex Random Drop Main for database

In [None]:
# random_drop_info_main_fin_df = random_drop_info_fin_df[["drop_id", "probability", "probability_denominator", "times"]].drop_duplicates("drop_id")

In [None]:
random_drop_main_df = random_drop_info_fin_df[["drop_id", "probability", "probability_denominator", "times"]].drop_duplicates().reset_index(drop=True).rename(columns={"drop_id": "id"})

In [None]:
random_drop_main_df.to_csv(f"{MAIN_PARSED_DIR}/random_drop_main_{APK_DATE}.csv", index=False)

### Drop Collections

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_dropV2_DropCollection.bytes", "r", encoding="utf-8") as filename:

  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)
texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

drop_collections = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
drop_entries = []

for id, parsed_dict in drop_collections.items():
  drop_entry = {}

  default_key_dict = {
      "drop_collections_id": "id",
      "amount": "amount",
      "drop_id": 'dropId',
      "static_id": "staticId",
      "type": "type",
      "weight": "weight",
  }

  # drop_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      drop_entry[col] = parsed_dict[def_key]
    except:
      pass
  drop_entries.append(drop_entry)

drop_collection_df = pd.DataFrame(drop_entries)

In [None]:
# random_drop_collection_info_df = pd.merge(complex_random_drop_com_df, drop_collection_df, how="left", left_on=["drop_collections_id"], right_on=["drop_collections_id"])

In [None]:
random_drop_collection_info_df = drop_collection_df[~drop_collection_df["static_id"].isnull()]

In [None]:
random_drop_collection_info_fins = []
for record in random_drop_collection_info_df.to_dict(orient="records"):
  for i in range(len(record["static_id"])):
    random_drop_collection_info_fin = {}
    random_drop_collection_info_fin["drop_collections_id"] = record["drop_collections_id"]
    # random_drop_collection_info_fin["drop_id"] = record["id"]
    random_drop_collection_info_fin["item_id"] = record["static_id"][i]
    random_drop_collection_info_fin["weight"] = record["weight"][i]
    random_drop_collection_info_fin["amount"] = record["amount"][i]

    random_drop_collection_info_fins.append(random_drop_collection_info_fin)

random_drop_collection_info_fin_df = pd.DataFrame(random_drop_collection_info_fins)

In [None]:
random_drop_collection_df = random_drop_collection_info_fin_df[["drop_collections_id", "item_id", "weight", "amount"]].drop_duplicates().reset_index().rename(columns={
    "index": "id",
    "item_id": "material_id"
})

intersections = set(random_drop_collection_df["material_id"].astype(float)).intersection(set(material_final_df["id"].astype(float)))
random_drop_collection_df = random_drop_collection_df[random_drop_collection_df["material_id"].astype(float).isin(intersections)]

# intersections = set(random_drop_collection_df["drop_id"].astype(float)).intersection(set(random_drop_com_df["id"].astype(float)))
# random_drop_collection_df = random_drop_collection_df[random_drop_collection_df["drop_id"].astype(float).isin(intersections)]

random_drop_collection_df.to_csv(f"{MAIN_PARSED_DIR}/random_drop_collection_{APK_DATE}.csv", index=False)

In [None]:
random_drop_collection_df

Unnamed: 0,id,drop_collections_id,material_id,weight,amount
0,0,400001,10217127,1,1
1,1,406301,10217127,1,1
2,2,406401,10217127,1,1
3,3,406402,10219517,1,1
4,4,406501,10217127,1,1
...,...,...,...,...,...
10788,10791,110601170801,10201030,480,1
10789,10792,110601170801,10201033,2390,100
10790,10793,110601170801,10201026,2390,40
10791,10794,110601170801,10201035,700,12


### Save Complex Random Drop Main Collections for database

In [None]:
random_drop_main_collection_df = random_drop_info_fin_df[["drop_id", "drop_collections_id", "drop_collections_weight"]].reset_index().rename(columns={"drop_collections_weight": "weight", "index": "id"})

In [None]:
intersections = set(random_drop_main_collection_df["drop_id"].astype(float)).intersection(set(random_drop_main_df["id"].astype(float)))
random_drop_main_collection_df = random_drop_main_collection_df[random_drop_main_collection_df["drop_id"].astype(float).isin(intersections)]

intersections = set(random_drop_main_collection_df["drop_collections_id"].astype(float)).intersection(set(random_drop_collection_df["drop_collections_id"].astype(float)))
random_drop_main_collection_df = random_drop_main_collection_df[random_drop_main_collection_df["drop_collections_id"].astype(float).isin(intersections)]

In [None]:
random_drop_main_collection_df.to_csv(f"{MAIN_PARSED_DIR}/random_drop_main_collection_{APK_DATE}.csv", index=False)

## Monster

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_monster_Monster.bytes", "r", encoding="utf-8") as filename:

  for text in filename.readlines():
    texts.append(text.strip())
    
  texts = " ".join(texts)

  texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
  texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
  texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
  texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)
  
  # print(texts)

  texts = texts.replace("{ {", "AAAAA")
  texts = texts.replace("}, }, },", "BBBBB")
  texts = texts.replace("}, }, {", "DDDDD")
  texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
  texts = re.sub(r'\}, \},$', 'EEEEE', texts)
  texts = texts.replace("}, },", "CCCCC")

  texts = texts.replace("AAAAA", "[ {")
  texts = texts.replace("BBBBB", "}, }, ],")
  texts = texts.replace("CCCCC", "}, ],")
  texts = texts.replace("DDDDD", "}, }, {")
  texts = texts.replace("EEEEE", "}, },")
  texts = texts.replace("FFFFF", "}, }, ")

  texts = "{" + texts + "}"

monsters = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
monster_entries = []

for id, parsed_dict in monsters.items():
  monster_entry = {}

  default_key_dict = {
      'name': 'name',
      'Desc': 'Desc',
      'DpsTestId': 'DpsTestId',
      'DropListKV': 'DropListKV',
      'EffectHang': 'EffectHang',
      'EffectId': 'EffectId',
      'EffectScale': 'EffectScale',
      'ExtraEffect': 'ExtraEffect',
      'HasDieEffect': 'HasDieEffect',
      'IfCanShowInDpsTest': 'IfCanShowInDpsTest',
      'IfIgnoreInvisibility': 'IfIgnoreInvisibility',
      'IsSpecialSkillNotTarget': 'IsSpecialSkillNotTarget',
      'MagicMap': 'MagicMap',
      'MvpRankDrop': 'MvpRankDrop',
      'MvpRareDrop': 'MvpRareDrop',
      'PatrolPos': 'PatrolPos',
      'PetId': 'PetId',
      'RareDropType': 'RareDropType',
      'RingScale': 'RingScale',
      'Scale': 'Scale',
      'ShowRing': 'ShowRing',
      'SkillEffect': 'SkillEffect',
      'Weather': 'Weather',
      'alertRange': 'alertRange',
      'aniSpeed': 'aniSpeed',
      'attackSpeedIncrease': 'attackSpeedIncrease',
      'baseExp': 'baseExp',
      'bloodNums': 'bloodNums',
      'bodily': 'bodily',
      'bornSkillId': 'bornSkillId',
      'bornSound': 'bornSound',
      'bronEffectId': 'bronEffectId',
      'calDamageToCreator': 'calDamageToCreator',
      'canBattleTeleport': 'canBattleTeleport',
      'cantSelect': 'cantSelect',
      'castSkillRate': 'castSkillRate',
      'chaseRange': 'chaseRange',
      'criticalLevel': 'criticalLevel',
      'criticalRate': 'criticalRate',
      'criticalResistanceLevel': 'criticalResistanceLevel',
      'criticalResistanceRate': 'criticalResistanceRate',
      'criticalResistanceValue': 'criticalResistanceValue',
      'criticalValue': 'criticalValue',
      'dialogueBubbleList': 'dialogueBubbleList',
      'dieEffectPath': 'dieEffectPath',
      'dieSound': 'dieSound',
      'dieSoundTime': 'dieSoundTime',
      'dodgeLevel': 'dodgeLevel',
      'dodgeRate': 'dodgeRate',
      'dropAnnouncementId': 'dropAnnouncementId',
      'finalMagicDefenseIncrease': 'finalMagicDefenseIncrease',
      'finalPhysicDefenseIncrease': 'finalPhysicDefenseIncrease',
      'fixedMagicDamage': 'fixedMagicDamage',
      'fixedMagicDamageReduce': 'fixedMagicDamageReduce',
      'fixedPhysicDamage': 'fixedPhysicDamage',
      'fixedPhysicDamageReduce': 'fixedPhysicDamageReduce',
      'followType': 'followType',
      'forceType': 'forceType',
      'hasWhiteEffect': 'hasWhiteEffect',
      'hitIncrease': 'hitIncrease',
      'hitLevel': 'hitLevel',
      'id': 'id',
      'idleSound': 'idleSound',
      'ifActive': 'ifActive',
      'ifChangeTarget': 'ifChangeTarget',
      'ifControlledByPunishment': 'ifControlledByPunishment',
      'ifSelectPlayerFirst': 'ifSelectPlayerFirst',
      'isBeHitBack': 'isBeHitBack',
      'isHideBlood': 'isHideBlood',
      'isHideName': 'isHideName',
      'isIgnoreForceAttack': 'isIgnoreForceAttack',
      'isKeyMonster': 'isKeyMonster',
      'isLevelEffect': 'isLevelEffect',
      'isResetStateLeaveBattle': 'isResetStateLeaveBattle',
      'isShowInMap': 'isShowInMap',
      'isUnmove': 'isUnmove',
      'jobExp': 'jobExp',
      'level': 'level',
      'lootForAll': 'lootForAll',
      'magicDamageIncrease': 'magicDamageIncrease',
      'magicDamagedIncrease': 'magicDamagedIncrease',
      'magicDefenseLevel': 'magicDefenseLevel',
      'magicDps': 'magicDps',
      'magicPenetrationIncrease': 'magicPenetrationIncrease',
      'magicPenetrationLevel': 'magicPenetrationLevel',
      'magicRebound': 'magicRebound',
      'maxHp': 'maxHp',
      'monsterCollectionId': 'monsterCollectionId',
      'monsterTypeForServer': 'monsterTypeForServer',
      'mvpDropId': 'mvpDropId',
      'name': 'name',
      'nameLocalized': 'nameLocalized',
      'navPos': 'navPos',
      'navSceneId': 'navSceneId',
      'navScenesId': 'navScenesId',
      'overChaseChangeHatred': 'overChaseChangeHatred',
      'patrolRange': 'patrolRange',
      'patrolSpeed': 'patrolSpeed',
      'patrolType': 'patrolType',
      'physicDamageIncrease': 'physicDamageIncrease',
      'physicDamagedIncrease': 'physicDamagedIncrease',
      'physicDefenseLevel': 'physicDefenseLevel',
      'physicDps': 'physicDps',
      'physicPenetrationIncrease': 'physicPenetrationIncrease',
      'physicPenetrationLevel': 'physicPenetrationLevel',
      'property': 'property',
      'race': 'race',
      'radius': 'radius',
      'readAttrFrom': 'readAttrFrom',
      'rebound': 'rebound',
      'resId': 'resId',
      'runSound': 'runSound',
      'showMiniProfile': 'showMiniProfile',
      'skills': 'skills',
      'soundVolume': 'soundVolume',
      'speed': 'speed',
      'staticId': 'staticId',
      'tips': 'tips',
      'type': 'type',
      'magicVampire': 'magicVampire',
      'vampire': 'vampire',
      'zeny': 'zeny'
  }

  monster_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      monster_entry[col] = parsed_dict[def_key]
    except:
      pass
  monster_entries.append(monster_entry)

monster_info_df = pd.DataFrame(monster_entries)

### Merge with information from `en_langs`

#### Monster name

In [None]:
monster_en_name_df = pd.DataFrame(parsed_data["monster_name"])
monster_en_name_df = monster_en_name_df.rename(columns={"id": "name_id", "value": "en_name"})

monster_info_df["name_id"] = monster_info_df["name"].apply(lambda x: str(x).replace('"', '').replace('MonsterName', ""))
monster_info_df = pd.merge(monster_info_df, monster_en_name_df, how="left", left_on="name_id", right_on="name_id")

#### Monster description

In [None]:
monster_en_desc_df = pd.DataFrame(parsed_data["monster_desc"])
monster_en_desc_df["value"] = monster_en_desc_df["value"].apply(lambda x: str(x).replace("<color=#FFFFFF00>jayw</color>", ""))
monster_en_desc_df["value"] = monster_en_desc_df["value"].apply(lambda x: re.sub(r'\<[\/\w\=\#]*\>', '', x))
monster_en_desc_df["value"] = monster_en_desc_df["value"].apply(lambda x: str(x).replace("\\n", "").replace("\\", ""))
monster_en_desc_df = monster_en_desc_df.rename(columns={"id": "desc_id", "value": "en_desc"})
monster_info_df["desc_id"] = monster_info_df["Desc"].apply(lambda x: str(x).replace('"', '').replace('MonsterCollection', ""))
monster_info_df = pd.merge(monster_info_df, monster_en_desc_df, how="left", left_on="desc_id", right_on="desc_id")

### Filter to include field monster only

In [None]:
monster_info_df.loc[monster_info_df["id"] == 10001, "level"] = 1

In [None]:
monster_df = monster_info_df[(~monster_info_df["DropListKV"].isnull()) & (~monster_info_df["navPos"].isnull()) & (~monster_info_df["en_name"].isnull()) & (~monster_info_df["en_name"].isnull())]
monster_df = monster_df[monster_df["level"] <= 120]
monster_df = monster_df[monster_df["id"] <= 20000]

monster_df = monster_df.drop_duplicates("en_name", keep="first")


### Save data for Monster Drop

In [None]:
monster_drop_records = monster_df[["id", "en_name", "DropListKV"]].to_dict(orient="records")

### Monster Skills

In [None]:
monster_skills_raw_df = monster_df[["id", "skills"]]

### Filter columns and column name correction

In [None]:
monster_df = monster_df.rename(columns={
    "id": "id",
    "attackSpeedIncrease": "final_aspd",
    "baseExp": "b_exp",
    "criticalLevel": "crit",
    "criticalRate": "final_crit",
    "criticalResistanceLevel": "crit_res",
    "criticalResistanceRate" : "final_crit_res",
    "dodgeLevel": "dodge",
    "dodgeRate": "final_dodge",
    "hitLevel": "hit",
    "hitIncrease": "final_hit",
    "jobExp": "j_exp",
    "magicDefenseLevel": "m_def",
    "finalMagicDefenseIncrease" : "final_m_def",
    "magicDamageIncrease": "final_m_dmg_bonus",
    "magicDamagedIncrease": "final_m_dmg_res",
    "magicDps": "m_dps",
    "magicPenetrationLevel": "m_pen",
    "magicPenetrationIncrease": "final_m_pen",
    "magicRebound": "m_reflect",
    "magicVampire": "m_lifesteal",
    "fixedMagicDamage": "m_dmg_bonus",
    "fixedMagicDamageReduce": "m_dmg_res",
    "maxHp": "max_hp",
    "navSceneId": "location",
    "bodily": "size",
    "physicDefenseLevel": "p_def",
    "physicDps": "p_dps",
    "physicDamageIncrease": "final_p_dmg_bonus",
    "fixedPhysicDamage": "p_dmg_bonus",
    "physicDamagedIncrease" : "final_p_dmg_res",
    "physicPenetrationIncrease": "final_p_pen",
    "finalPhysicDefenseIncrease": "final_p_def",
    "physicPenetrationLevel": "p_pen",
    "fixedPhysicDamageReduce": "p_dmg_res",
    "vampire": "p_lifesteal",
    "rebound": 'p_reflect',
    "property": "property",
    "race": "race",
    "type": "type",
    "level": "level",
    "resId": "res_id",
    "en_desc": "description"
    })

monster_df = monster_df[['id', 'en_name', 'b_exp', 'crit', 'final_crit', 'crit_res', 'final_crit_res', 'dodge', 'final_dodge', 'final_aspd', 'hit', 'final_hit', 'j_exp', 'm_def', 'final_m_def', 'final_m_dmg_bonus', 'final_m_dmg_res', 'm_dps', 'm_pen', 'final_m_pen', 'm_dmg_bonus', 'm_dmg_res', 'max_hp', 'location', 'size', 'p_def', 'final_p_def', 'p_dps', 'final_p_dmg_bonus', 'final_p_dmg_res', 'final_p_pen', 'p_pen', 'p_reflect', 'p_dmg_res', 'p_dmg_bonus', 'property', 'race', 'type', 'level', 'res_id', 'description', "zeny"]]

### Data correction

In [None]:
cols = ['crit', 'crit_res', 'dodge', 'hit', 'j_exp', 'm_def', 'm_dps', 'm_pen', 'max_hp', 'location', 'p_def', 'p_dps', 'p_pen']

for col in cols:
  monster_df[col] = monster_df[col].fillna(0)

monster_df["size"] = monster_df["size"].fillna(1)
monster_df["race"] = monster_df["race"].fillna(32)

cols = ['property', 'type']

for col in cols:
  monster_df[col] = monster_df[col].fillna(0)

monster_df["level"] = monster_df["level"].fillna(1)
monster_df = monster_df.rename(columns={"size": "size_id", "property": "attr_id", "race": "race_id", "type": "type_id", "location": "loc_id"})

### Merge with information from `en_langs`

#### Save data for database

#### Monster size

In [None]:
monster_size_df = pd.DataFrame({"size_id": [0, 1, 2, 3], "size": ["Large", "Medium", "Small", "Giant"]})
monster_df["size_id"] = monster_df["size_id"].astype(float)
monster_df = pd.merge(monster_df, monster_size_df, how="left", left_on="size_id", right_on="size_id")

#### Monster attributes

In [None]:
monster_attr_df = pd.DataFrame(parsed_data["attr"])
monster_attr_df = monster_attr_df.rename(columns={"id": "attr_id", "value": "attr"})
monster_attr_df["attr_id"] = monster_attr_df["attr_id"].astype(float)
monster_df["attr_id"] = monster_df["attr_id"].astype(float)
monster_attr_df["attr_id"] = monster_attr_df["attr_id"] - 1
monster_df = pd.merge(monster_df, monster_attr_df, how="left", left_on="attr_id", right_on="attr_id")

#### Monster race

In [None]:
monster_race_df = pd.DataFrame(parsed_data["race"])
monster_race_df = monster_race_df.rename(columns={"id": "race_id", "value": "race"})
monster_race_df["race_id"] = monster_race_df["race_id"].astype(float)
monster_df["race_id"] = monster_df["race_id"].astype(float)
monster_df = pd.merge(monster_df, monster_race_df, how="left", left_on="race_id", right_on="race_id")

#### Monster location

In [None]:
monster_loc_df = pd.DataFrame(parsed_data["scene_name"])
monster_loc_df = monster_loc_df.rename(columns={"id": "loc_id", "value": "loc"})
monster_loc_df["loc_id"] = monster_loc_df["loc_id"].astype(float)
monster_df["loc_id"] = monster_df["loc_id"].astype(float)
monster_df = pd.merge(monster_df, monster_loc_df, how="left", left_on="loc_id", right_on="loc_id")

### Data correction (cont)

In [None]:
monster_df = monster_df.drop(["size_id", "attr_id", "race_id"], axis=1)
cols = ["en_name", "size", "attr", "race"]

for col in cols:
  monster_df[col] = monster_df[col].str.lower()

monster_df["race"] = monster_df["race"].fillna("demi-human")
monster_df = monster_df.drop("type_id", axis=1)
monster_df = monster_df.rename(columns={
    "en_name": "name",
})

In [None]:
monster_df = monster_df.query("loc != '???'")

In [None]:
monster_final_df = monster_df.copy()

#### Save data for database

In [None]:
monster_final_df.to_csv(f"{MAIN_PARSED_DIR}/monster_{APK_DATE}.csv", index=False)

## Monster Drop

### Get Monster Drop information from Monster

In [None]:
# Get monster drop info
drop_records = []

for monster_drop_record in monster_drop_records:
  try:
    for drop in monster_drop_record["DropListKV"].items():
      drop_record = {}

      drop_id, value = drop
      drop_record["drop_id"] = drop_id
      drop_record["value"] = value
      drop_record["monster_name"] = monster_drop_record["en_name"]
      drop_record["monster_id"] = monster_drop_record["id"]

      drop_records.append(drop_record)
  except:
    drop_record["drop_id"] = drop_id
    drop_record["value"] = value
    drop_record["monster_name"] = monster_drop_record["en_name"]
    drop_record["monster_id"] = monster_drop_record["id"]

    drop_records.append(drop_record)

monster_drop_df = pd.DataFrame(drop_records)

### Generate Monster Random Drop

In [None]:
monster_random_drop_df = monster_drop_df[["monster_id", "drop_id"]].drop_duplicates().reset_index().rename(columns={"index": "id"})
monster_random_drop_df = monster_random_drop_df[monster_random_drop_df["drop_id"] != 0]

intersections = set(monster_random_drop_df["drop_id"].astype(int)).intersection(set(random_drop_main_df["id"].astype(int)))
monster_random_drop_df = monster_random_drop_df[monster_random_drop_df["drop_id"].astype(int).isin(intersections)]

intersections = set(monster_random_drop_df["monster_id"].astype(int)).intersection(set(monster_df["id"].astype(int)))
monster_random_drop_df = monster_random_drop_df[monster_random_drop_df["monster_id"].astype(int).isin(intersections)]

monster_random_drop_df.to_csv(f"{MAIN_PARSED_DIR}/monster_random_drop_{APK_DATE}.csv", index=False)

## Monster Skills

### Get information from Monster

In [None]:
monster_skills_records = monster_skills_raw_df.to_dict(orient="records")

In [None]:
skill_records = []

for monster_skills_record in monster_skills_records:
  for monster_skill_record in monster_skills_record["skills"]:
    skill_record = {}

    skill_record["monster_id"] = monster_skills_record["id"]
    skill_record["skill_id"] = monster_skill_record["skillId"]
    skill_record["cast_weight"] = monster_skill_record["castWeight"]
  
    skill_records.append(skill_record)

monster_skill_df = pd.DataFrame(skill_records)

In [None]:
monster_skill_df = monster_skill_df.reset_index().rename(columns={"index": "id"})

### Check for missing information

In [None]:
intersections = set(monster_skill_df["skill_id"].astype(int)).intersection(set(skill_complex_final_df["id"].astype(int)))
monster_skill_final_df = monster_skill_df[monster_skill_df["skill_id"].astype(int).isin(intersections)]

intersections = set(monster_skill_df["monster_id"].astype(int)).intersection(set(monster_final_df["id"].astype(int)))
monster_skill_final_df = monster_skill_df[monster_skill_df["monster_id"].astype(int).isin(intersections)]

#### Save data for database

In [None]:
monster_skill_final_df.to_csv(f"{MAIN_PARSED_DIR}/monster_skill_final_{APK_DATE}.csv", index=False)

## Boss

### Get Boss information from Monster

In [None]:
monster_info_df = monster_info_df[~monster_info_df["id"].isnull()]

In [None]:
monster_info_df["id"] = monster_info_df["id"].astype(int)

mvp_mini_info_df = monster_info_df[(~monster_info_df["MvpRareDrop"].isnull()) & (monster_info_df["id"] < 32000)]

In [None]:
mvp_mini_info_df = mvp_mini_info_df[mvp_mini_info_df["level"] <= 120]

### Filter columns to contain columns of interest

In [None]:
mvp_mini_df = mvp_mini_info_df.rename(columns={
    "id": "id",
    "attackSpeedIncrease": "final_aspd",
    "baseExp": "b_exp",
    "criticalLevel": "crit",
    "criticalRate": "final_crit",
    "criticalResistanceLevel": "crit_res",
    "criticalResistanceRate" : "final_crit_res",
    "dodgeLevel": "dodge",
    "dodgeRate": "final_dodge",
    "hitLevel": "hit",
    "hitIncrease": "final_hit",
    "jobExp": "j_exp",
    "magicDefenseLevel": "m_def",
    "finalMagicDefenseIncrease" : "final_m_def",
    "magicDamageIncrease": "final_m_dmg_bonus",
    "magicDamagedIncrease": "final_m_dmg_res",
    "magicDps": "m_dps",
    "magicPenetrationLevel": "m_pen",
    "magicPenetrationIncrease": "final_m_pen",
    "magicRebound": "m_reflect",
    "magicVampire": "m_lifesteal",
    "fixedMagicDamage": "m_dmg_bonus",
    "fixedMagicDamageReduce": "m_dmg_res",
    "maxHp": "max_hp",
    "navSceneId": "location",
    "bodily": "size",
    "physicDefenseLevel": "p_def",
    "physicDps": "p_dps",
    "physicDamageIncrease": "final_p_dmg_bonus",
    "fixedPhysicDamage": "p_dmg_bonus",
    "physicDamagedIncrease" : "final_p_dmg_res",
    "physicPenetrationIncrease": "final_p_pen",
    "finalPhysicDefenseIncrease": "final_p_def",
    "physicPenetrationLevel": "p_pen",
    "fixedPhysicDamageReduce": "p_dmg_res",
    "vampire": "p_lifesteal",
    "rebound": 'p_reflect',
    "property": "property",
    "race": "race",
    "type": "type",
    "level": "level",
    "resId": "res_id",
    "en_desc": "description"
    })

mvp_mini_df = mvp_mini_df[['id', 'en_name', 'b_exp', 'crit', 'final_crit', 'crit_res', 'final_crit_res', 'dodge', 'final_dodge', 'final_aspd', 'hit', 'final_hit', 'j_exp', 'm_def', 'final_m_def', 'final_m_dmg_bonus', 'final_m_dmg_res', 'm_dps', 'm_pen', 'final_m_pen', 'm_dmg_bonus', 'm_dmg_res', 'max_hp', 'location', 'size', 'p_def', 'final_p_def', 'p_dps', 'final_p_dmg_bonus', 'final_p_dmg_res', 'final_p_pen', 'p_pen', 'p_reflect', 'p_dmg_res', 'p_dmg_bonus', 'property', 'race', 'type', 'level', 'res_id', 'description', "zeny"]]

### Data Correction

In [None]:
for col in cols:
  if col in list(mvp_mini_df.columns):
    mvp_mini_df[col] = mvp_mini_df[col].fillna(0)

mvp_mini_df["race"] = mvp_mini_df["race"].fillna(32)
cols = ['type', 'size']

for col in cols:
  if col in list(mvp_mini_df.columns):
    mvp_mini_df[col] = mvp_mini_df[col].fillna(1)

mvp_mini_df["level"] = mvp_mini_df["level"].fillna(110)
mvp_mini_df = mvp_mini_df.rename(columns={"size": "size_id", "property": "attr_id", "race": "race_id", "type": "type_id", "location": "loc_id"})

### Merge with information from `en_langs`

#### Boss size

In [None]:
mvp_mini_df = pd.merge(mvp_mini_df, monster_size_df, how="left", left_on="size_id", right_on="size_id")

#### Boss attribute

In [None]:
mvp_mini_df = pd.merge(mvp_mini_df, monster_attr_df, how="left", left_on="attr_id", right_on="attr_id")

#### Boss race

In [None]:
mvp_mini_df = pd.merge(mvp_mini_df, monster_race_df, how="left", left_on="race_id", right_on="race_id")

### Final correction

In [None]:
mvp_mini_df = mvp_mini_df.rename(columns=({"en_name": "name"}))

In [None]:
mvp_mini_df= mvp_mini_df.drop(["size_id", "attr_id", "race_id"], axis=1)

cols = ["name", "size", "attr", "race"]

for col in cols:
  mvp_mini_df[col] = mvp_mini_df[col].str.lower()

mvp_mini_df["race"] = mvp_mini_df["race"].fillna("demi-human")

In [None]:
mvp_mini_df["attr"] = mvp_mini_df["attr"].fillna("neutral")

### Save data for database

In [None]:
boss_df = mvp_mini_df.copy()

In [None]:
boss_final_df = boss_df[~boss_df["name"].isnull()]

In [None]:
boss_final_df = boss_final_df.drop(["type_id", "loc_id"], axis=1)

In [None]:
boss_final_df.to_csv(f"{MAIN_PARSED_DIR}/boss_{APK_DATE}.csv", index=False)

## Boss Drop

### Get Boss Drop information from Boss

In [None]:
boss_drop_records = mvp_mini_info_df[["id", "MvpRareDrop"]].to_dict(orient="records")

drop_records = []

for boss_drop_record in boss_drop_records:
  try:
    for drop in boss_drop_record["MvpRareDrop"]:
      drop_record = {}

      drop_record["drop_id"] = drop["DropId"]
      drop_record["boss_id"] = boss_drop_record["id"]

      drop_records.append(drop_record)
  except:
    drop_record["drop_id"] = drop["DropId"]
    drop_record["boss_id"] = boss_drop_record["id"]

    drop_records.append(drop_record)

boss_drop_df = pd.DataFrame(drop_records)

### Merge with Random Drop

In [None]:
boss_random_drop_df = boss_drop_df.reset_index().rename(columns={"index": "id"})

intersections = set(boss_random_drop_df["drop_id"].astype(int)).intersection(set(random_drop_main_df["id"].astype(int)))
boss_random_drop_df = boss_random_drop_df[boss_random_drop_df["drop_id"].astype(int).isin(intersections)]

intersections = set(boss_random_drop_df["boss_id"].astype(int)).intersection(set(boss_final_df["id"].astype(int)))
boss_random_drop_df = boss_random_drop_df[boss_random_drop_df["boss_id"].astype(int).isin(intersections)]

### Save Boss Drop for database

In [None]:
boss_random_drop_df.to_csv(f"{MAIN_PARSED_DIR}/boss_random_drop_{APK_DATE}.csv", index=False)

## Creature

### Get data from Monster and Boss

In [None]:
creature_one_df = monster_final_df[["id", "name", "description", "res_id"]]
creature_one_df["is_boss"] = 0

creature_two_df = boss_final_df[["id", "name", "description", "res_id"]]
creature_two_df["is_boss"] = 1

In [None]:
creature_final_df = creature_one_df.append(creature_two_df, ignore_index=True)
creature_final_df["name"] = creature_final_df["name"].str.lower()

In [None]:
creature_final_df = creature_final_df.copy()
creature_df = creature_final_df.copy()

### Save data for database

In [None]:
creature_final_df.to_csv(f"{MAIN_PARSED_DIR}/creature_{APK_DATE}.csv", index=False)

## Instance Drop

### Get data for Instance Drop

In [None]:
drop_info_df["id"] = drop_info_df["id"].astype(int)
instance_drop_info_df = drop_info_df.query("id < 208999 and id >= 201000")

### Get Instance Fixed Drop information

#### Create entries for DataFrame creation

In [None]:
instance_fixed_drop_info_fin_df = fixed_drop_info_fin_df.query("drop_id < 208999 and drop_id >= 201000")

#### Data manipulation

In [None]:
instance_fixed_drop_info_fin_df["instance_id"] = instance_fixed_drop_info_fin_df["drop_id"] // 100
instance_fixed_drop_info_fin_df["item_id"] = instance_fixed_drop_info_fin_df["item_id"] - 1 + 1000
instance_fixed_drop_info_fin_df["item_id"] = instance_fixed_drop_info_fin_df["item_id"].apply(lambda x : 1001 if x == 1000 else x)
instance_fixed_drop_info_fin_df = instance_fixed_drop_info_fin_df.reset_index().rename(columns={"index": "id", "item_id": "material_id", "item_num": "material_num"})

#### Save Instance Fixed Drop for database

In [None]:
instance_fixed_drop_info_fin_df.to_csv(f"{MAIN_PARSED_DIR}/instance_fixed_drop_{APK_DATE}.csv", index=False)

### Get Instance Random Drop information

In [None]:
instance_random_drop_info_fin_df = random_drop_info_fin_df.query("drop_id < 208999 and drop_id >= 201000")

#### Create entries for DataFrame creation

#### Data manipulation

In [None]:
instance_random_drop_info_fin_df["instance_id"] = instance_random_drop_info_fin_df["drop_id"] // 100
eligible_instances = instance_random_drop_info_fin_df["instance_id"].unique()[np.where(instance_random_drop_info_fin_df["instance_id"].unique() >= 2020)]
eligible_instances = eligible_instances[np.where((eligible_instances % 10 != 3) & (eligible_instances % 10 != 4))]

for eligible_instance in eligible_instances:
  temp_df = instance_random_drop_info_fin_df.query("drop_id == 202300").copy()

  temp_df["drop_id"] = (eligible_instance + 100) * 100
  temp_df["instance_id"] = eligible_instance
  instance_random_drop_info_fin_df = instance_random_drop_info_fin_df.append(temp_df, ignore_index=True)

eligible_instances = [2000 + (i * 10) + 3 for i in range(1, 9)]

for eligible_instance in eligible_instances:
  drop_ids = [eligible_instance * 100 + 1, eligible_instance * 100 + 2]
  temp_df = instance_random_drop_info_fin_df.query(f"instance_id == {eligible_instance} and drop_id == @drop_ids").copy()

  for i in range(2):
    temp_df["instance_id"] = eligible_instance - (i + 2)
    temp_df["drop_id"] = temp_df["drop_id"] + i + 99 
    instance_random_drop_info_fin_df = instance_random_drop_info_fin_df.append(temp_df, ignore_index=True)

  if eligible_instance != 2023:
    drop_ids = [eligible_instance * 100 + 3, eligible_instance * 100 + 4]
    temp_df = instance_random_drop_info_fin_df.query(f"instance_id == {eligible_instance} and drop_id == @drop_ids").copy()

    temp_df["instance_id"] = eligible_instance - 1

    instance_random_drop_info_fin_df = instance_random_drop_info_fin_df.append(temp_df, ignore_index=True)
  else:
    drop_ids = [eligible_instance * 100 + 6, eligible_instance * 100 + 9]
    temp_df = instance_random_drop_info_fin_df.query(f"instance_id == {eligible_instance} and drop_id == @drop_ids").copy()

    temp_df["instance_id"] = eligible_instance - 1

    instance_random_drop_info_fin_df = instance_random_drop_info_fin_df.append(temp_df, ignore_index=True)

instance_random_drop_info_fin_df = instance_random_drop_info_fin_df[instance_random_drop_info_fin_df["instance_id"] % 10 != 3]
instance_random_drop_info_fin_df = instance_random_drop_info_fin_df[instance_random_drop_info_fin_df["instance_id"] % 10 != 4]

In [None]:
available_instances = instance_random_drop_info_fin_df["instance_id"].unique().tolist()

for available_instance in available_instances:
  extra_drop_temp_df = random_drop_info_fin_df.query("drop_id == 202305").copy()
  extra_drop_temp_df["drop_id"] = available_instance + 58 + 290000
  extra_drop_temp_df["instance_id"] = available_instance

  instance_random_drop_info_fin_df = instance_random_drop_info_fin_df.append(extra_drop_temp_df, ignore_index=True)  

#### Save Complex Instance Random Drop for database

In [None]:
instance_complex_random_drop_df = instance_random_drop_info_fin_df[["drop_id", "instance_id"]].drop_duplicates().reset_index().rename(columns={"index": "id"})

intersections = set(instance_complex_random_drop_df["drop_id"].astype(int)).intersection(set(random_drop_main_df["id"].astype(int)))
instance_complex_random_drop_df = instance_complex_random_drop_df[instance_complex_random_drop_df["drop_id"].astype(int).isin(intersections)]

instance_complex_random_drop_df.to_csv(f"{MAIN_PARSED_DIR}/instance_random_drop_{APK_DATE}.csv", index=False)

## Instance

### Get data from Instance Random Drop

In [None]:
instance_df = instance_random_drop_info_fin_df[["instance_id"]].dropna().drop_duplicates().reset_index(drop=True)

### Merge with information from `en_langs`

#### Scene Name

In [None]:
scene_name_df = pd.DataFrame(parsed_data["scene_name"]).rename(columns={"value": "name"})
scene_name_df["id"] = scene_name_df["id"].apply(int)

instance_df = instance_df.rename(columns={"instance_id": "id"})
instance_df = pd.merge(instance_df, scene_name_df, how="left", left_on=["id"], right_on=["id"])

In [None]:
instance_df.loc[instance_df["id"] == 2030, "name"] = "Ancient Pyramid"
instance_df.loc[instance_df["id"] == 2031, "name"] = "Ancient Pyramid (Hard)"

instance_df.loc[instance_df["id"] == 2040, "name"] = "Lost Temple"
instance_df.loc[instance_df["id"] == 2041, "name"] = "Lost Temple (Hard)"

### Data manipulation

In [None]:
def generate_instance_name(row):
  if row["id"] % 10 == 0:
    instance_id = row["id"]
    instance_name = f"{instance_df.query(f'id == {instance_id}')['name'].values[0]} (Normal)"
    return instance_name
  elif row["id"] % 10 == 1:
    instance_id = row["id"] - 1
    instance_name = f"{instance_df.query(f'id == {instance_id}')['name'].values[0]} (Hard)"
    return instance_name
  elif row["id"] % 10 == 2:
    instance_id = row["id"] - 2
    instance_name = f"{instance_df.query(f'id == {instance_id}')['name'].values[0]} (Nightmare)"
    return instance_name

instance_df["name"] = instance_df.apply(generate_instance_name, axis=1)

## Instance Group

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_InstanceGroup_InstanceGroup.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())

texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

instance_groups = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
instance_group_entries = []

for id, parsed_dict in instance_groups.items():
  instance_group_entry = {}

  default_key_dict = {
      "id": "id",
      "active_condition": "activeCondition",
      "boss_id": 'bossId',
      "boss_skill_id": 'bossSkillid',
      "crystal_res_id": 'crystalResId',
      "hero_boss_id": 'heroBossId',
      "hero_id": 'heroId',
      "instance_description": 'instanceDescription',
      "name": 'name',
      "normal_id": 'normalId',
      "normal_res_id": 'normalResId',
      "normal_target": 'normalTarget',
      "purgatorial_scene_id": 'purgatorialSceneId',
      "purgatorial_unlock_lv": 'purgatorialUnlockLv'
  }

  instance_group_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      instance_group_entry[col] = parsed_dict[def_key]
    except:
      pass
  instance_group_entries.append(instance_group_entry)

instance_group_df = pd.DataFrame(instance_group_entries)

### Data filtration

In [None]:
instance_group_df["id"] = instance_group_df["id"].astype(int)
instance_group_df = instance_group_df.query("id <= 10")

### Merge with information from `en_langs`

#### Instance description

In [None]:
instance_description_df = pd.DataFrame(parsed_data["instance_description"])
instance_description_df = instance_description_df.rename(columns={"value": "description"})
instance_group_df["id"] = instance_group_df["instance_description"].apply(lambda x: int(x.replace("InstanceDescription", "")))

instance_description_df["id"] = instance_description_df["id"].apply(int)
instance_group_df["id"] = instance_group_df["id"].apply(int)

instance_group_df = pd.merge(instance_group_df, instance_description_df, how="left", left_on=["id"], right_on=["id"])

### Data correction and manipulation

In [None]:
instance_group_df = instance_group_df[["normal_id", "description", "boss_id", "boss_skill_id", "crystal_res_id"]].rename(columns={"normal_id": "code"})

instance_df["code"] = instance_df["id"].apply(lambda x : round(x, -1))

In [None]:
instance_df = pd.merge(instance_df, instance_group_df, how="left", left_on=["code"], right_on=["code"])

def get_difficulty(x):
  if x % 10 == 0:
    return "normal"
  elif x % 10 == 1:
    return "hard"
  elif x % 10 == 2:
    return "nightmare"

instance_df["difficulty"] = instance_df["id"].apply(get_difficulty)
instance_df = instance_df[["id", "name", "description", "boss_id", "crystal_res_id", "difficulty"]]
instance_df["name"] = instance_df["name"].str.lower()

lost_temple_desc = instance_description_df.loc[instance_description_df["id"] == 3, "description"].values[0]
ancient_pyramid_desc = instance_description_df.loc[instance_description_df["id"] == 5, "description"].values[0]

instance_df.loc[instance_df["id"].isin([2030, 2031, 2032]), "description"] = ancient_pyramid_desc
instance_df.loc[instance_df["id"].isin([2040, 2041, 2042]), "description"] = lost_temple_desc

instance_df.loc[instance_df["id"].isin([2030, 2031, 2032]), "boss_id"] = 30013
instance_df.loc[instance_df["id"].isin([2040, 2041, 2042]), "boss_id"] = 30005

### Save data for database

In [None]:
instance_df.to_csv(f"{MAIN_PARSED_DIR}/instance_{APK_DATE}.csv", index=False)

## Refine

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_equip_Refine.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)
texts = re.sub(r'\[([\w\"]+)\]=', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

texts = texts.replace("{}", "[]")

texts = texts.replace("} } }", "X X X X")
texts = texts.replace("} }, {", "A A A A")

texts = texts.replace("{ {", "[ {")
texts = texts.replace("} }", "} ]")
texts = texts.replace("X X X X", "} } ]")
texts = texts.replace("A A A A", "} }, {")

texts = "{" + texts + "}"


refines = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
refine_entries = []

for id, parsed_dict in refines.items():
  refine_entry = {}

  default_key_dict = {
      "id": "id",
      "inherit_zeny": "InheritZeny",
      "break_refine": 'break_refine',
      "break_num": 'break_num',
      "break_rate": 'break_rate',
      'downgrade_lv': 'downgrade_lv',
      'downgrade_rate': 'downgrade_rate',
      'extraupgrade_rate': 'extraupgrade_rate',
      'leavebreak_rate': 'leavebreak_rate',
      'leavedowngrade_rate': 'leavedowngrade_rate',
      'leaveupgrade_rate': 'leaveupgrade_rate',
      'need_luck': 'need_luck',
      'need_zeny': 'need_zeny',
      'normal_material': 'normal_material',
      'normal_num':'normal_num',
      'property_id':'propertyId',
      'property_percent': 'propertyPercent',
      'quality': 'quality',
      'refine_id': 'refineId',
      'refine_lv': 'refine_lv',
      'return_luck': 'return_luck',
      'return_material': 'return_material',
      'return_num': 'return_num',
      'special_material': 'special_material',
      'special_num': 'special_num',
      'upgrade_rate': 'upgrade_rate'
  }


  refine_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      refine_entry[col] = parsed_dict[def_key]
    except:
      pass
  refine_entries.append(refine_entry)

refine_df = pd.DataFrame(refine_entries)

### Data manipulation

In [None]:
refine_df["quality"] = refine_df["quality"].fillna(1)

equip_refine_df = equip_df[["id", "refine_id"]].copy().rename(columns={"id": "equip_id"})
equip_refine_df = equip_refine_df[~equip_refine_df["refine_id"].isnull()]
refine_fin_df = refine_df[["id", "inherit_zeny", "need_zeny", "normal_material", "normal_num", "property_id", "property_percent", "refine_lv", "break_rate", "refine_id", "downgrade_rate", "upgrade_rate", "special_material", "special_num", "quality"]]
# refine_fin_df = refine_fin_df[~refine_fin_df["property_id"].isnull()]

isnull = refine_fin_df["property_id"].isnull()
refine_fin_df.loc[isnull, 'property_id'] = pd.Series([[49, 17]] * isnull.sum()).values

refine_fin_df['normal_num'] = refine_fin_df['normal_num'].fillna("8").apply(list).str[0]

for col in ["property_percent", "refine_lv", "break_rate", "downgrade_rate", "special_material", "special_num"]:
  refine_fin_df[col] = refine_fin_df[col].fillna(0)

refine_fin_df["normal_material"] = refine_fin_df["normal_material"].fillna("").apply(list).str[0]

refine_fin_df["property_percent"] = refine_fin_df.apply(lambda x: [0] * len(x["property_id"]) if x["property_percent"] == 0 else list(x["property_percent"]), axis=1)
refine_fin_df = refine_fin_df[~refine_fin_df["refine_id"].isnull()]

### Merge with Item

In [None]:
equip_refine_fin_df = refine_fin_df[["id", "refine_id", "inherit_zeny", "need_zeny", "normal_material", "normal_num", "break_rate", "upgrade_rate", "downgrade_rate", "refine_lv", "special_material", "special_num", "quality"]]

item_name_df["id"] = item_name_df["id"].apply(int)

equip_refine_fin_df = pd.merge(equip_refine_fin_df, item_name_df.rename(columns={"id": "normal_material", "value": "normal_item"}), how="left", left_on=["normal_material"], right_on=["normal_material"])
equip_refine_fin_df = pd.merge(equip_refine_fin_df, item_name_df.rename(columns={"id": "special_material", "value": "special_item"}), how="left", left_on=["special_material"], right_on=["special_material"])
equip_refine_fin_df = equip_refine_fin_df.drop(["normal_material", "special_material"], axis=1)
equip_refine_fin_df = equip_refine_fin_df.rename(columns={"item_name_x": "normal_item", "item_name_y": "special_item"})

### Save data for database

In [None]:
equip_refine_fin_df.to_csv(f"{MAIN_PARSED_DIR}/equip_refine_{APK_DATE}.csv", index=False)

## Refine Attributes

### Get data from Refine

In [None]:
refine_fin_entries = refine_fin_df[["refine_id", "property_id", "property_percent", "refine_lv", "quality"]].to_dict(orient="records")

### Create entries for DataFrame creation

In [None]:
refine_attr_entries = []

for refine_fin_entry in refine_fin_entries:
  for i in range(len(refine_fin_entry["property_id"])):
    refine_attr_entry = {}
    refine_attr_entry["refine_id"] = refine_fin_entry["refine_id"]

    refine_attr_entry["property_id"] = refine_fin_entry["property_id"][i]
    try:
      refine_attr_entry["property_percent"] = refine_fin_entry["property_percent"][i]
    except:
      refine_attr_entry["property_percent"] = refine_fin_entry["property_percent"][0]

    refine_attr_entry["refine_lv"] = refine_fin_entry["refine_lv"]
    refine_attr_entry["quality"] = refine_fin_entry["quality"]

    refine_attr_entries.append(refine_attr_entry)

equip_refine_attr_df = pd.DataFrame(refine_attr_entries)

### Data manipulation

In [None]:
equipment_attr_desc_df = pd.DataFrame(parsed_data["property"])

In [None]:
equip_refine_attr_df["id"] = pd.Series(range(0, equip_refine_attr_df.shape[0]))
equipment_attr_desc_df["id"] = equipment_attr_desc_df["id"].apply(int)

equip_refine_attr_df = pd.merge(equip_refine_attr_df, equipment_attr_desc_df.rename(columns={"id": "property_id", "value": "property"}), how="left")

equip_refine_attr_df = equip_refine_attr_df.drop(["property_id"], axis=1)

### Save data for database

In [None]:
equip_refine_attr_df.to_csv(f"{MAIN_PARSED_DIR}/equip_refine_attributes_{APK_DATE}.csv", index=False)

## Item Split

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_equip_ItemSplit.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"


dismantles = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
dismantle_entries = []

for id, parsed_dict in dismantles.items():
  dismantle_entry = {}

  default_key_dict = {
      "id": "id",
      "item_id": "item_id",
      "lower_item": 'lower_item',
      "need_num": 'need_num'
  }

  dismantle_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      dismantle_entry[col] = parsed_dict[def_key]
    except:
      pass
  dismantle_entries.append(dismantle_entry)

dismantle_df = pd.DataFrame(dismantle_entries)

### Data manipulation

In [None]:
dismantle_df["need_num"] = dismantle_df["need_num"].apply(lambda d: d if isinstance(d, list) else [2])
dismantle_df["lower_item"] = dismantle_df["lower_item"].apply(lambda d: d if isinstance(d, list) else [1016])

dismantle_df["item_id"] = dismantle_df["item_id"].fillna(dismantle_df["id"])

for col in ["need_num", "lower_item"]:
  dismantle_df[col] = dismantle_df[col].apply(lambda x: x[0])

dismantle_df = dismantle_df.rename(columns={
    "need_num": "dismantle_num",
    "item_id" : "material_id",
    "lower_item": "dismantle_id"
})

### Save data for database

In [None]:
intersections = set(dismantle_df["material_id"].astype(float)).intersection(set(material_final_df["id"].astype(float)))
dismantle_df = dismantle_df[dismantle_df["material_id"].astype(float).isin(intersections)]

intersections = set(dismantle_df["dismantle_id"].astype(float)).intersection(set(material_final_df["id"].astype(float)))
dismantle_df = dismantle_df[dismantle_df["dismantle_id"].astype(float).isin(intersections)]

In [None]:
dismantle_df.to_csv(f"{MAIN_PARSED_DIR}/material_dismantle_{APK_DATE}.csv", index=False)

## Item Combine

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_equip_ItemCombine.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"


item_combinations = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
item_combination_entries = []

for id, parsed_dict in item_combinations.items():
  item_combination_entry = {}

  default_key_dict = {
      "id": "Id",
      "if_combine": "if_combine",
      "lower_item": 'lower_item',
      "need_num": 'need_num',
      'condition': "condition",
      "add_luck": "add_luck",
  }

  item_combination_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      item_combination_entry[col] = parsed_dict[def_key]
    except:
      pass
  item_combination_entries.append(item_combination_entry)

item_combination_df = pd.DataFrame(item_combination_entries)

### Data manipulation

In [None]:
item_combination_df = item_combination_df[~item_combination_df["lower_item"].isnull()]
item_combination_df = item_combination_df[~item_combination_df["need_num"].isnull()]
item_combination_entries = item_combination_df[["id", "lower_item", "need_num"]].to_dict(orient="records")

item_combination_entries[0]

item_combination_final_entries = []

for item_combination_entry in item_combination_entries:
  for i, req_item_id in enumerate(item_combination_entry["lower_item"]):
    item_combination_final_entry = {}
    item_combination_final_entry["item_id"] = item_combination_entry["id"]
    item_combination_final_entry["req_item_id"] = req_item_id
    item_combination_final_entry["item_num"] = item_combination_entry["need_num"][i]

    item_combination_final_entries.append(item_combination_final_entry)

item_combination_df = pd.DataFrame(item_combination_final_entries)

In [None]:
item_combination_df = item_combination_df.reset_index().rename(columns={"index": "id"})

intersections = set(item_combination_df["item_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
item_combination_df = item_combination_df[item_combination_df["item_id"].astype(float).isin(intersections)]

intersections = set(item_combination_df["req_item_id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
item_combination_df = item_combination_df[item_combination_df["req_item_id"].astype(float).isin(intersections)]

In [None]:
item_combination_df.to_csv(f"{MAIN_PARSED_DIR}/item_combination_{APK_DATE}.csv", index=False)

## Card

### Get data from Item

In [None]:
card_info_df = item_df[~item_df["item_name"].isnull()]
card_info_df = card_info_df[((card_info_df["item_name"].str.contains("Card")) & (~card_info_df["item_name"].str.contains("Fragment")) & (~card_info_df["item_type_en"].isnull()) | (card_info_df["item_name"].str.contains("卡片")))]

card_main_df = card_info_df[["id", "card_coordinate_point", "is_mvp_card", "unlock_adventure_exp", "card_quality", "monster"]]
card_main_df = card_main_df[~card_main_df["card_coordinate_point"].isnull()]

### Save data for database

In [None]:
card_main_df = card_main_df[~card_main_df["monster"].isnull()]
card_main_df["monster"] = card_main_df["monster"].apply(lambda x: next(iter(x)))

card_df = card_main_df.rename(columns={"monster": "creature_id"})
intersections = set(card_df["creature_id"].astype(float)).intersection(set(creature_df["id"].astype(float)))
card_df = card_df[card_df["creature_id"].astype(float).isin(intersections)]

intersections = set(card_df["id"].astype(float)).intersection(set(item_final_df["id"].astype(float)))
card_df = card_df[card_df["id"].astype(float).isin(intersections)]

card_df.to_csv(f"{MAIN_PARSED_DIR}/card_{APK_DATE}.csv", index=False)

## Card Attributes

### Get data from Card

In [None]:
card_attr_entries = card_info_df[["id", "card_attrs"]].to_dict(orient="records")

### Create entries for DataFrame creation

In [None]:
card_attr_final_entries = []

for card_attr_entry in card_attr_entries:
  try:
    for card_attr in card_attr_entry["card_attrs"]:
      card_attr_final_entry = {}
      card_attr_final_entry["id"] = card_attr_entry["id"]
      card_attr_final_entry["attr_desc"] = card_attr["attrDesc"]

      card_attr_final_entries.append(card_attr_final_entry)
  except:
    pass

card_attr_df = pd.DataFrame(card_attr_final_entries)

### Merge with information from `en_langs`

#### Attribute Description

In [None]:
card_attr_desc_df = pd.DataFrame(parsed_data["card_attr_desc"])
card_attr_desc_df["id"] = card_attr_desc_df["id"].apply(lambda x: f"CardAttributeDescription_{x}")
card_attr_desc_df = card_attr_desc_df.rename(columns={"id": "attr_desc"})

card_fin_df = pd.merge(card_attr_df, card_attr_desc_df, how="left", left_on=["attr_desc"], right_on=["attr_desc"])
card_fin_df = card_fin_df.drop("attr_desc", axis=1).rename(columns={"value": "attribute"})

### Save data for database

In [None]:
card_fin_df = card_fin_df.rename(columns={
  "id": "card_id",    
})

card_fin_df = card_fin_df.reset_index(drop=True)
card_fin_df["id"] = pd.Series(range(0,card_fin_df.shape[0]))

card_fin_df = card_fin_df[~card_fin_df["attribute"].isnull()]
intersections = set(card_fin_df["card_id"].astype(float)).intersection(set(card_df["id"].astype(float)))
card_fin_df = card_fin_df[card_fin_df["card_id"].astype(float).isin(intersections)]

In [None]:
card_fin_df.to_csv(f"{MAIN_PARSED_DIR}/card_attributes_{APK_DATE}.csv", index=False)

## Card Awakening

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_item_CardCoordinates.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

card_coordinates = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
card_coordinate_entries = []

for id, parsed_dict in card_coordinates.items():
  card_coordinate_entry = {}

  default_key_dict = {
      "name": "name",
      "attr_id": "attrId",
      "card": 'card',
      "icon": 'icon',
      'staticId': "static_id",
      "type": "type",
  }

  card_coordinate_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      card_coordinate_entry[col] = parsed_dict[def_key]
    except:
      pass
  card_coordinate_entries.append(card_coordinate_entry)

card_coordinates_df = pd.DataFrame(card_coordinate_entries)

### Split entries to granularize

In [None]:
card_coordinate_entries = card_coordinates_df[["id", "name", "card", "icon"]].to_dict(orient="records")

card_coordinate_final_entries = []

for card_coordinate_entry in card_coordinate_entries:
  for card in card_coordinate_entry["card"]:
    card_coordinate_final_entry = {}
    card_coordinate_final_entry["name"] = card_coordinate_entry["name"]
    card_coordinate_final_entry["card"] = card
    card_coordinate_final_entry["icon"] = card_coordinate_entry["icon"]

    card_coordinate_final_entries.append(card_coordinate_final_entry)

card_coordinates_df = pd.DataFrame(card_coordinate_final_entries)

### Merge with `en_langs`

In [None]:
card_coordinate_names_df = pd.DataFrame(parsed_data["card_coordinates"]).rename(columns={"value": "name"})
card_coordinate_names_df["id"] = card_coordinate_names_df["id"].apply(int)
card_coordinates_df = card_coordinates_df[card_coordinates_df["name"].str.contains("CardCoordinates")]
card_coordinates_df["name"] = card_coordinates_df["name"].str.replace("CardCoordinates_", "").apply(int)
card_coordinates_df = card_coordinates_df.rename(columns={"name": "id"})
card_coordinates_df = pd.merge(card_coordinates_df, card_coordinate_names_df, how="left").drop("id", axis=1)

### Save data for database

In [None]:
card_coordinates_df = card_coordinates_df.reset_index(drop=True)
card_coordinates_df["id"] = pd.Series(range(0, card_coordinates_df.shape[0]))

card_coordinates_df = card_coordinates_df.rename(columns={
    "card": "card_id",
    "name": "category",
})

intersections = set(card_coordinates_df["card_id"].astype(float)).intersection(set(card_df["id"].astype(float)))
card_coordinates_df = card_coordinates_df[card_coordinates_df["card_id"].astype(float).isin(intersections)]

card_coordinates_df.to_csv(f"{MAIN_PARSED_DIR}/card_awakening_{APK_DATE}.csv", index=False)

## Equipment Formula

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_equip_EquipmentFormula.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

equipment_formulas = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
equipment_formula_entries = []

for id, parsed_dict in equipment_formulas.items():
  equipment_formula_entry = {}

  default_key_dict = {
      "id": "id",
      "curreny_value": "currencyValue",
      "dynamic_npc_id": "dynamicNpcId",
      "equip_type": "equipType",
      "formula_type": "formulaType",
      "material_id": "materialId",
      "material_num": "materialNum",
      "equip_id": "productionEquipId",
  }

  equipment_formula_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      equipment_formula_entry[col] = parsed_dict[def_key]
    except:
      pass
  equipment_formula_entries.append(equipment_formula_entry)

equip_formula_df = pd.DataFrame(equipment_formula_entries)

### Filter data

In [None]:
equip_formula_df = equip_formula_df[~equip_formula_df["equip_id"].isnull()]
equip_formula_df = equip_formula_df[~equip_formula_df["material_num"].isnull()]
equip_formula_df = equip_formula_df[~equip_formula_df["material_id"].isnull()]

### Break entries down

In [None]:
equip_formula_req_df = equip_formula_df[["id", "equip_id", "material_id", "material_num"]]
equip_formula_reqs = equip_formula_req_df.to_dict(orient="records")

equip_formula_req_news = []
for equip_formula_req in equip_formula_reqs:
  for i, material_id in enumerate(equip_formula_req["material_id"]):
    new_entry = {}
    new_entry["equip_id"] = equip_formula_req["equip_id"]
    new_entry["material_id"] = material_id
    new_entry["material_num"] = equip_formula_req["material_num"][i]
  
    equip_formula_req_news.append(new_entry)

equip_formula_final_df = pd.DataFrame(equip_formula_req_news)
equip_formula_zeny_df = equip_formula_df[["curreny_value", "equip_id"]]
equip_formula_zeny_df["material_id"] = 1001
equip_formula_zeny_df = equip_formula_zeny_df.rename(columns={"curreny_value": "material_num"})

equip_formula_final_df = equip_formula_final_df.append(equip_formula_zeny_df, ignore_index=True)

### Save data for database

In [None]:
# equip_formula_final_df = equip_formula_final_df.rename(columns={
#     "material_id": "mat_id",
#     "material_num": "mat_num"
# })

equip_formula_final_df = equip_formula_final_df.reset_index(drop=True)
equip_formula_final_df["id"] = pd.Series(range(0, equip_formula_final_df.shape[0]))

In [None]:
intersections = set(equip_formula_final_df["equip_id"].astype(float)).intersection(set(equip_final_df["id"].astype(float)))
equip_formula_final_df = equip_formula_final_df[equip_formula_final_df["equip_id"].astype(float).isin(intersections)]

intersections = set(equip_formula_final_df["material_id"].astype(float)).intersection(set(material_final_df["id"].astype(float)))
equip_formula_final_df = equip_formula_final_df[equip_formula_final_df["material_id"].astype(float).isin(intersections)]

In [None]:
equip_formula_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_craft_{APK_DATE}.csv", index=False)

## Equipment Decomposition

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_equip_EquipmentDecomposition.bytes", "r", encoding="utf8") as filename:
# with open(f"test.txt", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

equipment_decompositions = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
equipment_decomposition_entries = []

for id, parsed_dict in equipment_decompositions.items():
  equipment_decomposition_entry = {}

  default_key_dict = {
      "decomposition_id": "Id",
      "material_id": "materialId",
      "material_num": "materialNum",
  }

  equipment_decomposition_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      equipment_decomposition_entry[col] = parsed_dict[def_key]
    except:
      pass
  equipment_decomposition_entries.append(equipment_decomposition_entry)

equip_decomposition_df = pd.DataFrame(equipment_decomposition_entries)

### Break entries down

In [None]:
# equip_decomposition_df["material_num"] = equip_decomposition_df["material_num"].apply(list)

In [None]:
equip_decomposition_df = equip_decomposition_df[(~equip_decomposition_df["decomposition_id"].isnull()) & (~equip_decomposition_df["material_num"].isnull())]
equip_decomposition_df = equip_decomposition_df.reset_index(drop=True)

isnull = equip_decomposition_df["material_id"].isnull()
equip_decomposition_df.loc[isnull, 'material_id'] = pd.Series([[]] * isnull.sum()).values

equip_decomposition_df["material_id"] = equip_decomposition_df["material_id"].apply(lambda x: [10204044] if x == [] else x)
equip_decomposition_req_df = equip_decomposition_df[["decomposition_id", "material_id", "material_num"]]
equip_decomposition_reqs = equip_decomposition_req_df.to_dict(orient="records")

equip_decomposition_req_news = []

for equip_decomposition_req in equip_decomposition_reqs:
  for i, material_id in enumerate(equip_decomposition_req["material_id"]):
    new_entry = {}
    new_entry["decomposition_id"] = equip_decomposition_req["decomposition_id"]
    new_entry["material_id"] = material_id
    new_entry["material_num"] = equip_decomposition_req["material_num"][i]
  
    equip_decomposition_req_news.append(new_entry)

equip_decomposition_final_df = pd.DataFrame(equip_decomposition_req_news)

### Save data for database

In [None]:
equip_decomposition_final_df = equip_decomposition_final_df.reset_index(drop=True)
equip_decomposition_final_df["id"] = pd.Series(range(0, equip_decomposition_final_df.shape[0]))

intersections = set(equip_decomposition_final_df["decomposition_id"].astype(float)).intersection(set(equip_final_df["decomposition_id"].astype(float)))
equip_decomposition_final_df = equip_decomposition_final_df[equip_decomposition_final_df["decomposition_id"].astype(float).isin(intersections)]

intersections = set(equip_decomposition_final_df["material_id"].astype(float)).intersection(set(material_final_df["id"].astype(float)))
equip_decomposition_final_df = equip_decomposition_final_df[equip_decomposition_final_df["material_id"].astype(float).isin(intersections)]

equip_decomposition_final_df.to_csv(f"{MAIN_PARSED_DIR}/equip_decomposition_{APK_DATE}.csv", index=False)

## Life Skill Area

### Get data from `en_langs`

In [None]:
area_name_df = pd.DataFrame(parsed_data["area_name_new"]).rename(columns={"id": "id", "value": "name"})

In [None]:
area_name_df.to_csv(f"{MAIN_PARSED_DIR}/life_skill_area_{APK_DATE}.csv", index=False)

## Life Skill Area Drop

### Parse data

In [None]:
texts = []

with open(f"{MAIN_CLEANED_DIR}/data_lifeSkill_AreaDrop.bytes", "r", encoding="utf8") as filename:
  for text in filename.readlines():
    texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

# print(texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, }, ],")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

life_skill_area_drops = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
life_skill_area_drop_entries = []

for id, parsed_dict in life_skill_area_drops.items():
  life_skill_area_drop_entry = {}

  default_key_dict = {
      "area_name": 'Areaname',
      "bait_drop_id": 'BaitDropId',
      "bait_id": "BaitId",
      "drop_id": 'Dropid',
      "extra_drop": 'ExtraDrop',
      "fish_energy": 'FishEnergy',
      "fish_exp": 'FishExp',
      "get_fish_cd": 'GetFishCD',
      "level_extra_drop": 'LevelExtraDrop',
      "life_level_limit": 'LifeLevelLimit',
      "pick_cd": 'PickCD',
      "pick_energy": 'PickEnergy',
      "pick_exp" : 'PickExp',
      "life_level": 'LifeLevel',
      "mine_level_energy": 'MineLevelEnergy',
      "get_mine_cd": 'GetMineCD',
      "mine_level_limit": 'MineLevelLimit',
      "mine_level_limit_drop_id": 'MineLevelLimitDropId',
      "mine_tool": 'MineTool',
      "get_mine_cd": 'GetMineCD',
      "rich_mine": 'RichMine',
      "rich_mine_energy": 'RichMineEnergy',
      "rich_mine_num": 'RichMineNum'
  }

  life_skill_area_drop_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      life_skill_area_drop_entry[col] = parsed_dict[def_key]
    except:
      pass
  life_skill_area_drop_entries.append(life_skill_area_drop_entry)

life_skill_area_drop_df = pd.DataFrame(life_skill_area_drop_entries)

### Merge with `en_langs` Area Name information

In [None]:
area_name_df = area_name_df.rename(columns={"id": "area_id", "name": "area_name"})
area_name_df["area_id"] = area_name_df["area_id"].apply(lambda x: f"Areaname{x}")

life_skill_area_drop_df = life_skill_area_drop_df.rename(columns={"area_name": "area_id"})
life_skill_area_drop_df = pd.merge(life_skill_area_drop_df, area_name_df, how="left", left_on=["area_id"], right_on=["area_id"])

## Fishing

### Get data from Life Skill Area Drop

In [None]:
fishing_df = life_skill_area_drop_df[~life_skill_area_drop_df["fish_energy"].isnull()]

In [None]:
fishing_df = fishing_df[["id", "area_id", "area_name", "bait_drop_id",	"bait_id",	"drop_id",	"extra_drop",	"fish_energy",	"fish_exp",	"get_fish_cd",	"level_extra_drop", "life_level_limit"]]

#### Create entries for DataFrame creation

In [None]:
fishing_df = fishing_df[fishing_df["area_id"].str.startswith("Areaname")]

In [None]:
fishing_df["area_id"] = fishing_df["area_id"].str.replace("Areaname", "").apply(int)
fishing_df = fishing_df[~fishing_df["bait_id"].isnull()]

In [None]:
fishing_infos = []
for record in fishing_df.to_dict(orient="records"):
  for i, bait in enumerate(record["bait_id"]):
    for j, extra_drop in enumerate(record["extra_drop"]):
      fishing_info = {}
      fishing_info["id"] = record["id"]
      fishing_info["area_id"] = record["area_id"]
      fishing_info["area_name"] = record["area_name"]
      fishing_info["bait_drop_id"] = record["bait_drop_id"][i]
      fishing_info["bait_id"] = bait
      fishing_info["drop_id"] = record["drop_id"]
      fishing_info["fish_energy"] = record["fish_energy"]
      fishing_info["fish_exp"] = record["fish_exp"]
      fishing_info["get_fish_cd"] = record["get_fish_cd"]
      fishing_info["level_extra_drop"] = record["level_extra_drop"][0]["Drop"][0]
      fishing_info["extra_drop"] = record["extra_drop"][j]
      fishing_info["life_level_limit"] = record["life_level_limit"]

      fishing_infos.append(fishing_info)

fishing_info_df = pd.DataFrame(fishing_infos)

In [None]:
fishing_info_df = fishing_info_df[fishing_info_df["bait_drop_id"] > 0]
fishing_info_df = fishing_info_df[~fishing_info_df["life_level_limit"].isnull()]

### Fishing Pond Information

In [None]:
fishing_pond_df = fishing_info_df[["id", "area_id", "fish_energy", "fish_exp", "get_fish_cd", "level_extra_drop", "extra_drop", "life_level_limit"]].drop_duplicates()
fishing_pond_df = fishing_pond_df.rename(columns={"area_id": "life_skill_area_id"})

intersections = set(fishing_pond_df["level_extra_drop"].astype(float)).intersection(set(random_drop_main_df["id"].astype(float)))
fishing_pond_df = fishing_pond_df[fishing_pond_df["level_extra_drop"].astype(float).isin(intersections)]

intersections = set(fishing_pond_df["extra_drop"].astype(float)).intersection(set(random_drop_main_df["id"].astype(float)))
fishing_pond_df = fishing_pond_df[fishing_pond_df["extra_drop"].astype(float).isin(intersections)]

fishing_pond_df = fishing_pond_df.rename(columns={"level_extra_drop": "level_extra_drop_id", "extra_drop": "extra_drop_id"})

fishing_pond_df.to_csv(f"{MAIN_PARSED_DIR}/fishing_pond_{APK_DATE}.csv", index=False)

### Fishing Pond Bait Drop Information

In [None]:
fishing_pond_random_drop_df = fishing_info_df[["id", "bait_drop_id", "bait_id", "drop_id"]].rename(columns={"id": "fishing_pond_id"}).reset_index().rename(columns={"index": "id"})

intersections = set(fishing_pond_random_drop_df["bait_drop_id"].astype(float)).intersection(set(random_drop_main_df["id"].astype(float)))
fishing_pond_random_drop_df = fishing_pond_random_drop_df[fishing_pond_random_drop_df["bait_drop_id"].astype(float).isin(intersections)]

intersections = set(fishing_pond_random_drop_df["drop_id"].astype(float)).intersection(set(random_drop_main_df["id"].astype(float)))
fishing_pond_random_drop_df = fishing_pond_random_drop_df[fishing_pond_random_drop_df["drop_id"].astype(float).isin(intersections)]

intersections = set(fishing_pond_random_drop_df["bait_id"].astype(float)).intersection(set(material_final_df["id"].astype(float)))
fishing_pond_random_drop_df = fishing_pond_random_drop_df[fishing_pond_random_drop_df["bait_id"].astype(float).isin(intersections)]

fishing_pond_random_drop_df.to_csv(f"{MAIN_PARSED_DIR}/fishing_pond_random_drop_{APK_DATE}.csv", index=False)

## Mining

### Get data from Life Skill Area Drop

In [None]:
mining_df = life_skill_area_drop_df[~life_skill_area_drop_df["mine_level_energy"].isnull()]
mining_df = mining_df[mining_df["rich_mine_energy"].apply(lambda x: len(x)) == 1]

In [None]:
mining_df = mining_df[["id", "area_id", "area_name", "extra_drop", "life_level_limit", "mine_level_energy", "get_mine_cd", "mine_level_limit", "mine_level_limit_drop_id"]]

for col in ["mine_level_energy", "get_mine_cd", "mine_level_limit", "mine_level_limit_drop_id"]:
  mining_df[col] =   mining_df[col].apply(list).str[0]

#### Create entries for DataFrame creation

In [None]:
mining_df["area_id"] = mining_df["area_id"].str.replace("Areaname", "").apply(int)

mining_infos = []
for record in mining_df.to_dict(orient="records"):
    for j, extra_drop in enumerate(record["extra_drop"]):
      mining_info = {}
      mining_info["id"] = record["id"]
      mining_info["area_id"] = record["area_id"]
      mining_info["area_name"] = record["area_name"]
      mining_info["extra_drop"] = record["extra_drop"][j]
      mining_info["mine_level_limit"] = record["mine_level_limit"]
      mining_info["mine_level_energy"] = record["mine_level_energy"]
      mining_info["mine_exp"] = record["mine_level_energy"]
      mining_info["get_mine_cd"] = record["get_mine_cd"]
      mining_info["drop_id"] = record["mine_level_limit_drop_id"]
      

      mining_infos.append(mining_info)

mining_info_df = pd.DataFrame(mining_infos)

### Mining Ore Information

In [None]:
mining_ore_df = mining_info_df[["id", "area_id", "mine_level_energy", "mine_exp", "get_mine_cd", "drop_id", "mine_level_limit"]].drop_duplicates()
mining_ore_df = mining_ore_df.rename(columns={"area_id": "life_skill_area_id"})

intersections = set(mining_ore_df["drop_id"].astype(float)).intersection(set(random_drop_main_df["id"].astype(float)))
mining_ore_df = mining_ore_df[mining_ore_df["drop_id"].astype(float).isin(intersections)]

mining_ore_df.to_csv(f"{MAIN_PARSED_DIR}/mining_ore_{APK_DATE}.csv", index=False)

### Mining Ore Extra Drop

In [None]:
mining_ore_extra_drop_df = mining_info_df[["id", "extra_drop"]].rename(columns={"id": "mining_ore_id", "extra_drop": "extra_drop_id"}).reset_index().rename(columns={"index": "id"})

intersections = set(mining_ore_extra_drop_df["extra_drop_id"].astype(float)).intersection(set(random_drop_main_df["id"].astype(float)))
mining_ore_extra_drop_df = mining_ore_extra_drop_df[mining_ore_extra_drop_df["extra_drop_id"].astype(float).isin(intersections)]

mining_ore_extra_drop_df.to_csv(f"{MAIN_PARSED_DIR}/mining_ore_extra_drop_{APK_DATE}.csv", index=False)

## Bapho Raid

### Initialize fixed variable

In [None]:
#data_raid_RaidInstance.bytes

party_rewards = [
  209011,
  209012,
  209013,
  209014,
  209015,
  209027,
  209028,
]

individual_rewards = [
  209016,
  209017,
  209018,
  209019,
  209020,
  209021,
  209022,
  209023,
  209024,
  209025,
  209026
]

team_rewards = [
  209006,
  209007,
  209008,
  209009,
  209010,
]

### Create DataFrame for drop and save it for database

In [None]:
complex_baphomet_drop_df = pd.DataFrame({"drop_id": individual_rewards, "reward_type": 1})
baphomet_party_drop_df = pd.DataFrame({"drop_id": party_rewards, "reward_type": 2})
baphomet_team_drop_df = pd.DataFrame({"drop_id": party_rewards, "reward_type": 3})

complex_baphomet_drop_df = complex_baphomet_drop_df.append(baphomet_party_drop_df, ignore_index=True)
complex_baphomet_drop_df = complex_baphomet_drop_df.append(baphomet_team_drop_df, ignore_index=True)
complex_baphomet_drop_df = complex_baphomet_drop_df.reset_index().rename(columns={"index": "id"})

intersections = set(complex_baphomet_drop_df["drop_id"].astype(int)).intersection(set(random_drop_main_df["id"].astype(int)))
complex_baphomet_drop_df = complex_baphomet_drop_df[complex_baphomet_drop_df["drop_id"].astype(int).isin(intersections)]

complex_baphomet_drop_df.to_csv(f"{MAIN_PARSED_DIR}/baphomet_random_drop_{APK_DATE}.csv", index=False)

### Create DataFrame for information

In [None]:
bapho_rules = "Opening Time:\nAll day\n\nEntry Requirements:\n1. Adventurers can enter the Illusion Trial after completing the tutorial quest [Named Hero], and a corresponding Illusion Key is required.\n2. Each time the Illusion Trial is cleared, a Illusion Key will be consumed. Every Monday at 05:00, the Illusion Keys obtained last week will be destroyed.\n3. Adventurers can go to the Librarian Winry at the Geffen Library to access the Illusion Exchange Bureau, where they can exchange or purchase Illusion Keys and other Illusion Trial related items.\n4. Adventurers can form a team to challenge the Illusion Trial. The team formed will be divided into 3 parties, with a maximum of 5 people per party.\n5. There is a crystal ball at the entrance of the Illusion Trial, and Adventurers can summon their teammates to this place. Teammates who are in instances or special maps cannot be summoned.\n6. The team members who newly enter the Illusion Trial will synchronize the current Illusion Trial progress.\n\nReward Rules:\n1. Individual Reward: Adventurers will get rewards every time they clear the Illusion Trial.\n2. Party Reward: Adventurers can tap the items they need in the ornate chests dropped by the BOSS and roll the dice. Adventurers with the highest number of dice in the party will get the item. The rewards of each chest differs and can only be shared by the party members.\n3. Team Reward: After clearing the Illusion Trial, there is a chance that the Black Market Merchant will appear and issue rewards. The rewards will be unlocked by using the Illusion Certificates, and the rewards will be assigned according to the number of the Illusion Certificates given. Adventurers who do not get the rewards will reclaim the Illusion Certificates given by them previously.\n4. Reward chances will be reset every Monday at 05:00. Each week, Adventurers can get a maximum of 1 individual reward, 1 party reward, and 1 team reward.\n\nCombat Rules:\n1. When less than 15 Adventurers enter the Illusion Trial, the attributes and stats of the monsters will be increased. The fewer Adventurers enter the Illusion, the stronger the monsters will be!\n2. In the Illusion Trial, Adventurers’ Damage and Damage Taken will be modified in line with the standard level of the Illusion.\n3. Adventurers who die after entering the battle state cannot be resurrected immediately, they need to wait for the resurrection button to light up after the whole party/team is destroyed.\n4. Adventurers who die more than 2x in the battle state be restricted from resurrecting, and cannot be resurrected by skills or items as well. Adventurers’ deaths will be reset after resurrection.\n5. Players cannot use skills to play dead in the Illusion Trial."

In [None]:
bapho_rules = bapho_rules.replace("\n", "<br>")

## Shadow Weapon

### Parse data

In [None]:
texts = []

LIMIT = 999999
with open(f"{MAIN_CLEANED_DIR}/data_ShadowWeapon_ShadowWeapon.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, ], },")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

shadow_equipments = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
shadow_equipment_entries = []

for id, parsed_dict in shadow_equipments.items():
  shadow_equipment_entry = {}

  default_key_dict = {
      "id": "Id",
      "artifact_name": "ArtifactName",
      "class_branch": "ClassBranch",
      "cost_item_id": "CostItemId",
      "cost_num": "CostNum",
      "cost_type": "CostType",
      "job": "Job",
      "name": "Name",
      "require_weapon_type": "RequireWeaponType",
  }

  shadow_equipment_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      shadow_equipment_entry[col] = parsed_dict[def_key]
    except:
      pass
  shadow_equipment_entries.append(shadow_equipment_entry)

shadow_equipment_df = pd.DataFrame(shadow_equipment_entries)

## Shadow Weapon Prop

### Parse data

In [None]:
texts = []

LIMIT = 999999
with open(f"{MAIN_CLEANED_DIR}/data_ShadowWeapon_ShadowWeaponProp.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, ], },")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

shadow_equipment_skills = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
shadow_equipment_skill_entries = []

for id, parsed_dict in shadow_equipment_skills.items():
  shadow_equipment_skill_entry = {}

  default_key_dict = {
      "artifact_cost_item_id": "ArtifactCostItemId",
      "artifact_cost_num": "ArtifactCostNum",
      "artifact_cost_type": "ArtifactCostType",
      "prop_group": "PropGroup",
      "shadow_weapon_level_limit_list": "ShadowWeaponLevelLimitList",
      "shadow_weapon_level_skill_id": "ShadowWeaponSkillId",
      "skill_open_level": "SkillOpenLevel",
  }

  shadow_equipment_skill_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      shadow_equipment_skill_entry[col] = parsed_dict[def_key]
    except:
      pass
  shadow_equipment_skill_entries.append(shadow_equipment_skill_entry)

shadow_equipment_skill_df = pd.DataFrame(shadow_equipment_skill_entries)

In [None]:
shadow_equipment_skill_df["skill_open_level"] = shadow_equipment_skill_df["skill_open_level"].fillna(1)

## Shadow Weapon Skill

### Parse data

In [None]:
texts = []

LIMIT = 999999
with open(f"{MAIN_CLEANED_DIR}/data_ShadowWeapon_ShadowWeaponSkill.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, ], },")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

shadow_equipment_skills = literal_eval(texts)

### Create entries for DataFrame creation

In [None]:
shadow_equipment_skill_entries = []

for id, parsed_dict in shadow_equipment_skills.items():
  shadow_equipment_skill_entry = {}

  default_key_dict = {
      "artifact_cost_item_id": "ArtifactCostItemId",
      "artifact_cost_num": "ArtifactCostNum",
      "artifact_cost_type": "ArtifactCostType",
      "prop_group": "PropGroup",
      "shadow_weapon_level_limit_list": "ShadowWeaponLevelLimitList",
      "shadow_weapon_level_skill_id": "ShadowWeaponSkillId",
      "skill_open_level": "SkillOpenLevel",
  }

  shadow_equipment_skill_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      shadow_equipment_skill_entry[col] = parsed_dict[def_key]
    except:
      pass
  shadow_equipment_skill_entries.append(shadow_equipment_skill_entry)

shadow_equipment_skill_df = pd.DataFrame(shadow_equipment_skill_entries)

In [None]:
shadow_equipment_skill_df["skill_open_level"] = shadow_equipment_skill_df["skill_open_level"].fillna(1)

## Shadow Weapon Skill Group

### Parse data

In [None]:
texts = []

LIMIT = 999999
with open(f"{MAIN_CLEANED_DIR}/data_ShadowWeapon_ShadowWeaponSkillGroup.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, ], },")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

shadow_equipment_skill_groups = literal_eval(texts)

In [None]:
shadow_equipment_skill_groups[707]

{'GroupName': 'ShadowWeaponSkillGroup_007',
 'Id': 707,
 'Name': '坚锤羽盾-器灵',
 'ShadowWeaponSkillIdList': [12000129, 12000130, 12000131, 12000132]}

### Create entries for DataFrame creation

In [None]:
shadow_equipment_skill_group_entries = []

for id, parsed_dict in shadow_equipment_skills.items():
  shadow_equipment_skill_group_entry = {}

  default_key_dict = {
      "id": "Id",
      "group_name": "GroupName",
      "name": "Name",
      "shadow_weapon_skill_id_list": "ShadowWeaponSkillIdList",

  }

  shadow_equipment_skill_group_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      shadow_equipment_skill_group_entry[col] = parsed_dict[def_key]
    except:
      pass
  shadow_equipment_skill_group_entries.append(shadow_equipment_skill_group_entry)

shadow_equipment_skill_group_df = pd.DataFrame(shadow_equipment_skill_group_entries)

## Shadow Weapon Skill Group

### Parse data

In [None]:
texts = []

LIMIT = 999999
with open(f"{MAIN_CLEANED_DIR}/data_ShadowWeapon_ShadowWeaponSkillGroup.bytes", "r", encoding="utf8") as filename:
  for i, text in enumerate(filename.readlines()):
    if i == LIMIT:
      break
    else:
      texts.append(text.strip())
    
texts = " ".join(texts)

texts = re.sub(r'\[([\w\"]+)\] =', '\g<1>:', texts)
texts = re.sub(r'{ ([\d,]+) }', '[\g<1>]', texts)
texts = re.sub(r'\{([\d\, ]+)\}', '[\g<1>]', texts)
texts = re.sub(r'\:\{([\d\,\ ]+)\}', ':[\g<1>]', texts)

texts = texts.replace("{ {", "AAAAA")
texts = texts.replace("}, }, },", "BBBBB")
texts = texts.replace("}, }, {", "DDDDD")
texts = re.sub(r'\}, \}, (\d)', 'FFFFF \g<1>', texts)
texts = re.sub(r'\}, \},$', 'EEEEE', texts)
texts = texts.replace("}, },", "CCCCC")

texts = texts.replace("AAAAA", "[ {")
texts = texts.replace("BBBBB", "}, ], },")
texts = texts.replace("CCCCC", "}, ],")
texts = texts.replace("DDDDD", "}, }, {")
texts = texts.replace("EEEEE", "}, },")
texts = texts.replace("FFFFF", "}, }, ")

texts = "{" + texts + "}"

shadow_equipment_skill_groups = literal_eval(texts)

In [None]:
shadow_equipment_skill_groups[707]

{'GroupName': 'ShadowWeaponSkillGroup_007',
 'Id': 707,
 'Name': '坚锤羽盾-器灵',
 'ShadowWeaponSkillIdList': [12000129, 12000130, 12000131, 12000132]}

### Create entries for DataFrame creation

In [None]:
shadow_equipment_skill_group_entries = []

for id, parsed_dict in shadow_equipment_skills.items():
  shadow_equipment_skill_group_entry = {}

  default_key_dict = {
      "id": "Id",
      "group_name": "GroupName",
      "name": "Name",
      "shadow_weapon_skill_id_list": "ShadowWeaponSkillIdList",

  }

  shadow_equipment_skill_group_entry["id"] = id
  for col, def_key in default_key_dict.items():
    try:
      shadow_equipment_skill_group_entry[col] = parsed_dict[def_key]
    except:
      pass
  shadow_equipment_skill_group_entries.append(shadow_equipment_skill_group_entry)

shadow_equipment_skill_group_df = pd.DataFrame(shadow_equipment_skill_group_entries)