In [19]:
import re
import getpass
import shutil
import numpy as np
import pandas as pd
import text_input
import importlib

## Parameters

In [20]:
report  = "R3"
path2SP = f"/Users/{getpass.getuser()}/OneDrive - World Justice Project/EU Subnational/EU-S Data/reports/eu-thematic-reports"

## Reading outline

In [21]:
# Reading outline
outline = pd.read_excel(f"{path2SP}/data-viz/inputs/report_outline.xlsx")
charts4reports = (
    outline.copy()
    .loc[(outline["thematic_reports"] == True) ]
)

# Creating text input classes

In [22]:
# Reading text inputs
input_files = [
    "title-&-scroll.md",
    "introduction.md",
    # "executive-summary.md",
    "thematic-findings.md",
    "appendix.md",
    "methodology.md",
    "about.md",
    "acknowledgements.md"
]
inputs_as_classes = [text_input.text_input(report, x) for x in input_files]
inputs = dict(zip(input_files, inputs_as_classes))

## Wrangling and saving report structure

In [23]:
versions = {
    "html-version" : False,
    "pdf-version"  : True
}
for version, parameter in versions.items():

    front_page = inputs["title-&-scroll.md"].get_front_page()
    intro_sections = [
        input.get_intro_sections(start_id = np.max(front_page.id)+1) 
        for type, input in inputs.items() 
        if type in ["introduction.md", "executive-summary.md"]
    ]
    thematic_findings = (
        inputs["thematic-findings.md"]
        .get_thematic_findings(
            charts4reports, 
            pdfver   = parameter, 
            id_start = np.max(intro_sections[-1].id)+1
        )
    )
    final_sections = [
        input.get_final_sections(start_id = np.max(thematic_findings.id)+1)
        for type, input in inputs.items()
        if type in ["appendix.md", "methodology.md", "about.md", "acknowledgements.md"]
    ]
    
    csv_data = pd.concat([front_page] + intro_sections + [thematic_findings] + final_sections)
    csv_data[["id", "belongs_to"]] = csv_data[["id", "belongs_to"]].astype("Int64")
    data4config = (
        csv_data.copy()
        .loc[~csv_data["id4config"].isna(), ["content", "id4config"]]
    )
    csv_data.columns = ["id", "tipo de elemento", "contenido (markdown)", "pertenece a ", "settings", "id4config"]
    (
        csv_data
        .drop(columns=["id4config"])
        .to_csv(f"{report}/{report}-csv-schema-{version}.csv", index = False, encoding = "utf-8")
    )


## Creating config file

In [24]:
data4config["file"] = data4config["content"].apply(lambda x: re.sub(r"\.svg", ".csv", x))
data4config["id"]   = data4config["content"].apply(lambda x: re.sub(r"\.svg", "", x))

config_file = pd.merge(
    data4config,
    outline[["chart_id", "description", "type", "target_var_1", "section"]],
    how      = "left",
    left_on  = "id",
    right_on = "chart_id"
)

desc2names = {
    "QRQ" : "expert",
    "GPP" : "people"
}
type2names = {
    "Map"       : "map",
    "Lollipop"  : "lollypop",
    "Dumbbells" : "dumbbell"
}
config_file["description"] = config_file["description"].replace(desc2names)
config_file["type"] = config_file["type"].replace(type2names)
config_file_final = (
    config_file.loc[config_file["type"].isin(["map", "lollypop", "dumbbell"]), ["type", "file", "id4config", "description"]]
)
config_file_final.columns = ["type", "file", "id", "handler"]
config_file_final.to_csv(f"{report}/config.csv", index = False, encoding = "utf-8")

## Wrangling and saving mini tables

In [25]:
## Subsetting data
if report == "R1":
    report_title = "Democracy & Fundamental Rights"
if report == "R2":
    report_title = "Justice & Safety"
if report == "R3":
    report_title = "Transparency & Corruption"

In [26]:
data4minitabs = (
    config_file.copy()
    .loc[config_file["type"].isin(["map", "lollypop", "dumbbell"])]
)

gpp_data = pd.read_csv(f"{path2SP}/data-viz/output/data4web_gpp.csv")
qrq_data = pd.read_csv(f"{path2SP}/data-viz/output/data4web_qrq.csv")

for _, row in data4minitabs.iterrows():
    if row["description"] == "expert":

        minitab = (
            qrq_data.copy()
            .loc[(qrq_data["indicator"] == row["target_var_1"]) & (qrq_data["subpillar_name"] == row["section"])]
        )

    if row["description"] == "people":
        minitab = (
            gpp_data.copy()
            .loc[
                (gpp_data["demographic"] == "Total Sample") & 
                (gpp_data["id"] == row["target_var_1"]) & 
                (gpp_data["subsection"] == row["section"]) &
                (gpp_data["chapter"] == report_title)
            ]
        )

    minitab.to_csv(f"{path2SP}/final-charts/mini-tables/{report}/{row['file']}", index = False, encoding = "utf-8")


## Gathering report assets

In [27]:
targeted_outline = charts4reports.loc[charts4reports["report"] == report_title, ["figure", "chart_id", "type"]]

# Looping through charts
for _, row in targeted_outline.iterrows():
    if row["type"] in ["Map", "Dumbbells", "Lollipop"]:
        source      = f"{path2SP}/final-charts/mini-tables/{report}/{row['chart_id']}.csv"
        destination = f"{path2SP}/html/{report}/assets/{row['chart_id']}.csv"

    if row["type"] in ["Bars", "QRQ Bars", "Table"]:
        source      = f"{path2SP}/final-charts/no-tooltip/{report}/{row['chart_id']}.svg"
        destination = f"{path2SP}/html/{report}/assets/{row['chart_id']}.svg"

    if row["type"] in ["Dots"]:
        source      = f"{path2SP}/final-charts/with-tooltip/{report}/{row['chart_id']}.svg"
        destination = f"{path2SP}/html/{report}/assets/{row['chart_id']}.svg"

    if row["type"] in ["Scatterplot", "Map (Categorical)"]:
        source      = f"{path2SP}/final-charts/reduced/{row['chart_id']}.svg"
        destination = f"{path2SP}/html/{report}/assets/{row['chart_id']}.svg"

    if row["type"] not in ["Box"]:
        shutil.copy2(source, destination)    