# Output outcomes

depends on download_sheets

- layer1.csv, layer2.csv, layer3.csv, layer4.csv

In [None]:
# Output outcomes l1,l2,l3,l4

import csv
import os
import pandas as pd
import re
import glob
import itertools
from lib.vars import dirs,langs
from lib.utils import get_glob_file,save_csv,load_csv
from lib.dataframe_to_grouped_numbers import dataframe_to_grouped_numbers
from lib.tables_utils import get_table_index,format_table_ref


conversion_dicts = {
    "l1":{"第1層イニシャル":"l1_index","id":"l1_id","第1層フルスペル":"l1_spell"},
    "l2": {"id":"l2_id"},
    "l234": {"id":"l4_id"},
    "l1_ja":{"第1層":"l1","第1層説明":"l1_desc"},
    "l1_en":{"l1-en":"l1","l1-en-desc":"l1_desc"},
    "l2_ja":{"第2層":"l2","第2層説明":"l2_desc"},
    "l2_en":{"l2-en":"l2","l2-desc-en":"l2_desc"},
    "l3_ja":{"第3層":"l3"},
    "l3_en":{"l3-en":"l3"},
    "l4_ja":{"第4層":"l4"},
    "l4_en":{"l4-en":"l4"},
}

output_dicts = {
    "l1": {"l1_index":"index","l1_id":"id","l1_spell":"spell","l1":"item","l1_desc":"description"},
    "l2": {"l2_index":"index","l2_id":"id","l2":"item","l2_desc":"description","l1_id":"parent"},
    "l3": {"l3_index":"index","l3_id":"id","l3":"item","l2_id":"parent"},
    "l4": {"l4_index":"index","l4_id":"id","l4":"item","l3_id":"parent"},
}

data = {}

# reading layer 1
data["l1"]=load_csv(f"{dirs().input.sheets.outcomes}/第1層/第1層.csv").\
    rename(columns=conversion_dicts["l1"])

# reading layer 2
data["l2"] =  pd.DataFrame(data=[],columns=[])
for i, l1 in data["l1"].iterrows():
    filename = get_glob_file(f"{dirs().input.sheets.outcomes}/{l1['l1_index']}*/第2層.csv")
    l2_unit =pd.read_csv(filename,encoding="utf_8_sig")\
        .rename(columns=conversion_dicts["l2"]) 
    l2_unit["l2_index"] = l1["l1_index"]+"-"+(l2_unit.index+1).astype("str").str.zfill(2)
    l2_unit["l1_index"] = l1["l1_index"]
    l2_unit["l1_id"] = l1["l1_id"]
    data["l2"]=pd.concat([data["l2"],l2_unit])

# reading layer 3 and 4
data["l3"] =  pd.DataFrame(data=[],columns=[])
data["l4"] =  pd.DataFrame(data=[],columns=[])
for i, l1 in data["l1"].iterrows():
    filename = get_glob_file(f"{dirs().input.sheets.outcomes}/{l1['l1_index']}*/第2から4層.csv")
    l234_unit=load_csv(filename)\
        .rename(columns=conversion_dicts["l234"])
    l234_unit = pd.merge(l234_unit,data["l2"],how="left",on="第2層")
    indexes=dataframe_to_grouped_numbers(l234_unit,["第2層","第3層","第4層"])\
        .rename(columns={"第2層":"l2","第3層":"l3","第4層":"l4"})

    l234_unit["l3_index"]=l234_unit["l2_index"]+"-"+indexes["l3"].astype("str").str.zfill(2)
    l234_unit["l4_index"]=l234_unit["l3_index"]+"-"+indexes["l4"].astype("str").str.zfill(2)

    # add layer 3
    data["l3"] = pd.concat([data["l3"],l234_unit.drop_duplicates(subset=["l3_index"])]) 

    # add layer 4
    data["l4"] = pd.concat([data["l4"],l234_unit]) 

data["l3"]["第3層"] = data["l3"]["第3層"].map(format_table_ref)
data["l4"]["第4層"] = data["l4"]["第4層"].map(format_table_ref)
data["l3"]["l3-en"] = data["l3"]["l3-en"].fillna("").map(format_table_ref)
data["l4"]["l4-en"] = data["l4"]["l4-en"].fillna("").map(format_table_ref)

for lang in langs:
    os.makedirs(f"{dirs(lang).output.outcomes}",exist_ok=True)

# output l1,l2,l3,l4
for i,lang in itertools.product(range(1,5),langs):
    layer=data[f"l{i}"].rename(columns=conversion_dicts[f"l{i}_{lang}"])
    layer=layer.loc[:,output_dicts[f"l{i}"].keys()]\
        .rename(columns=output_dicts[f"l{i}"])
    output_dir = dirs(lang).output.outcomes
    save_csv(layer,f"{output_dir}/layer{i}.csv")
    print(f"output... layer{i}({lang}).csv")

