# Output outcomes

depends on download_sheets

- tables_index.csv
- tables(tables/*.csv)
- l1.csv, l2.csv, l3.csv, l4.csv

In [None]:
"""
output tables
"""
import re
import os
import pandas as pd
from lib.utils import OUTPUT_OUTCOMES_DIR,OUTPUT_OUTCOMES_TABLE_DIR
from lib.apply_condition_to_dataframe  import apply_condition_to_dataframe
from lib.outcomes_utils import get_table_index,iter_tables_for_outcome_raw

os.makedirs(OUTPUT_OUTCOMES_DIR,exist_ok=True)
os.makedirs(OUTPUT_OUTCOMES_TABLE_DIR,exist_ok=True)

for table,info in iter_tables_for_outcome_raw():
    table = table\
        .loc[:,[*re.split(r" *, *",info.columns),"index","UID"] ]
    print(f"output... {info.id}.csv")
    table.to_csv(f"{OUTPUT_OUTCOMES_TABLE_DIR}/{info.id}.csv",index=False)




In [None]:
"""
output table_index
"""
from os import path,makedirs
from lib.utils import save_csv,OUTPUT_OUTCOMES_DIR
from lib.outcomes_utils import get_table_index

makedirs(OUTPUT_OUTCOMES_DIR,exist_ok=True)


table_index = get_table_index().drop("source",axis=1)
save_csv(table_index,path.join(OUTPUT_OUTCOMES_DIR,"tables_index.csv"))
print(f"output... tables_index.csv")


In [None]:
# Output outcomes l1,l2,l3,l4

import csv
import os
import pandas as pd
import re
import glob
import itertools
from lib.utils import BASE_DIR,SHEETS_OUTCOMES_DIR,OUTPUT_OUTCOMES_DIR,EN_OUTPUT_OUTCOMES_DIR,get_glob_file,save_csv,load_csv
from lib.dataframe_to_grouped_numbers import dataframe_to_grouped_numbers
from lib.outcomes_utils import get_table_index,format_table_ref


conversion_dicts = {
    "l1":{"第1層イニシャル":"l1_index","UID":"l1_UID","第1層フルスペル":"l1_spell"},
    "l2": {"UID":"l2_UID"},
    "l234": {"UID":"l4_UID"},
    "l1_ja":{"第1層":"l1","第1層説明":"l1_desc","l1_UID":"UID","l1_index":"index"},
    "l1_en":{"l1-en":"l1","l1-en-desc":"l1_desc","l1_UID":"UID","l1_index":"index"},
    "l2_ja":{"第2層":"l2","第2層説明":"l2_desc","l2_UID":"UID","l2_index":"index"},
    "l2_en":{"l2-en":"l2","l2-desc-en":"l2_desc","l2_UID":"UID","l2_index":"index"},
    "l3_ja":{"第3層":"l3","l3_index":"index","l3_UID":"UID"},
    "l3_en":{"l3-en":"l3","l3_index":"index","l3_UID":"UID"},
    "l4_ja":{"第4層":"l4","l4_index":"index","l4_UID":"UID"},
    "l4_en":{"l4-en":"l4","l4_index":"index","l4_UID":"UID"},
}

output_lists = {
    "l1": ["index","UID","l1_spell","l1","l1_desc"],
    "l2": ["index","UID","l2","l2_desc","l1_UID"],
    "l3": ["index","UID","l3","l2_UID"],
    "l4": ["index","UID","l4","l3_UID"],
}

data = {}

# reading layer 1
data["l1"]=load_csv(f"{SHEETS_OUTCOMES_DIR}/第1層/第1層.csv").\
    rename(columns=conversion_dicts["l1"])


# reading layer 2
data["l2"] =  pd.DataFrame(data=[],columns=[])
for i, l1 in data["l1"].iterrows():
    filename = get_glob_file(f"{SHEETS_OUTCOMES_DIR}/{l1['l1_index']}*/第2層.csv")
    l2_unit =pd.read_csv(filename,encoding="utf_8_sig")\
        .rename(columns=conversion_dicts["l2"]) 
    l2_unit["l2_index"] = l1["l1_index"]+"-"+(l2_unit.index+1).astype("str").str.zfill(2)
    l2_unit["l1_index"] = l1["l1_index"]
    l2_unit["l1_UID"] = l1["l1_UID"]
    data["l2"]=pd.concat([data["l2"],l2_unit])

# reading layer 3 and 4
data["l3"] =  pd.DataFrame(data=[],columns=[])
data["l4"] =  pd.DataFrame(data=[],columns=[])
for i, l1 in data["l1"].iterrows():
    filename = get_glob_file(f"{SHEETS_OUTCOMES_DIR}/{l1['l1_index']}*/第2から4層.csv")
    l234_unit=load_csv(filename)\
        .rename(columns=conversion_dicts["l234"])
    l234_unit = pd.merge(l234_unit,data["l2"],how="left",on="第2層")
    indexes=dataframe_to_grouped_numbers(l234_unit,["第2層","第3層","第4層"])\
        .rename(columns={"第2層":"l2","第3層":"l3","第4層":"l4"})

    l234_unit["l3_index"]=l234_unit["l2_index"]+"-"+indexes["l3"].astype("str").str.zfill(2)
    l234_unit["l4_index"]=l234_unit["l3_index"]+"-"+indexes["l4"].astype("str").str.zfill(2)

    # add layer 3
    data["l3"] = pd.concat([data["l3"],l234_unit.drop_duplicates(subset=["l3_index"])]) 

    # add layer 4
    data["l4"] = pd.concat([data["l4"],l234_unit]) 

data["l3"]["第3層"] = data["l3"]["第3層"].map(format_table_ref)
data["l4"]["第4層"] = data["l4"]["第4層"].map(format_table_ref)
data["l3"]["l3-en"] = data["l3"]["l3-en"].fillna("").map(format_table_ref)
data["l4"]["l4-en"] = data["l4"]["l4-en"].fillna("").map(format_table_ref)

os.makedirs(f"{OUTPUT_OUTCOMES_DIR}",exist_ok=True)
os.makedirs(f"{EN_OUTPUT_OUTCOMES_DIR}",exist_ok=True)

# output l1,l2,l3,l4
for i,lang in itertools.product(range(1,5),("ja","en")):
    layer=data[f"l{i}"].rename(columns=conversion_dicts[f"l{i}_{lang}"])
    layer=layer.loc[:,output_lists[f"l{i}"]]
    output_dir = OUTPUT_OUTCOMES_DIR if lang=="ja" else EN_OUTPUT_OUTCOMES_DIR
    save_csv(layer,f"{output_dir}/l{i}.csv")
    print(f"output... l{i}({lang}).csv")

