In [1]:
%load_ext nb_black
import os
import pandas as pd
import numpy as np

<IPython.core.display.Javascript object>

## Functions

In [2]:
def create_description_table(table, waves, language, return_links_as_symbols=True):
    """ Create variable overview from data description table of liss data."""
    table = table.copy()
    # Fill in nans as -.
    table = table.fillna("-")
    # Drop columns.
    table = table.drop(["label_english", "categories_english"], axis=1)
    table = table[table["question_group"] != "-"]
    table = table[table["Variable"] != "-"]
    for i in [1, 2, 3]:
        if i not in waves:
            table = table.drop([f"wave{i}"], axis=1)

    # Drop observations if they are not in any included wave.
    if len(waves) == 1:
        table = table[table[f"wave{waves[0]}"] != "-"]
    elif len(waves) == 2:
        table = table[
            (table[f"wave{waves[0]}"] != "-") | (table[f"wave{waves[1]}"] != "-")
        ]
    else:
        table = table[
            (table[f"wave{waves[0]}"] != "-")
            | (table[f"wave{waves[1]}"] != "-")
            | (table[f"wave{waves[2]}"] != "-")
        ]

    # replace question id with question group id.
    for i in waves:
        table[f"wave{i}"] = np.where(
            table[f"wave{i}"] == "-", table[f"wave{i}"], table["question_group"]
        )

    # create question link ":ref:`wNL-{question_group}`.
    for i in waves:
        if return_links_as_symbols:
            table[f"wave{i}"] = table[f"wave{i}"].apply(
                lambda x: f":ref:`ðŸ”— <w{str(i)}{language}-{x}>`"
            )
        else:
            table[f"wave{i}"] = table[f"wave{i}"].apply(
                lambda x: f"w{str(i)}{language}-{x}"
            )
        table[f"wave{i}"] = np.where(
            table[f"wave{i}"].str.contains("--") == False, table[f"wave{i}"], "-"
        )

    # Final formatting
    # Replace ordered categoricals to drop 'ordered' column.
    table["Type"] = np.where(
        table["ordered"] != True, table["Type"], "Ordered Categorical"
    )

    # Ensure correct formatting for reference period column,
    # create as new column to esnure it is last in table.
    table = table.replace("-", -111)
    table["Reference Period Other Than Survey Period"] = table[
        "reference period other than survey period"
    ].astype(int)

    # Drop unneeded columns.
    table = table.drop(
        ["question_group", "ordered", "reference period other than survey period"],
        axis=1,
    )

    # Replace missings with empty strig so they don't show up in table.
    table = table.replace(-111, " ")

    # Final ordering and rename wave link headers.
    final_table = pd.DataFrame()
    final_table["Variable"] = table["Variable"]

    for i in waves:
        final_table[f"Links Wave {i}"] = table[f"wave{i}"]

    final_table[["Type", "Topic", "Reference Period Other Than Survey Period"]] = table[
        ["Type", "Topic", "Reference Period Other Than Survey Period"]
    ]
    return final_table

<IPython.core.display.Javascript object>

## Create Table 

Read in description table 

In [3]:
table = pd.read_csv("description-tables/xyx-corona-description-table.csv", sep=";")

<IPython.core.display.Javascript object>

Rename columns so they are easier to handle for the functions

In [4]:
table = table.rename(
    columns={
        "L_CoronavirusImpact_wave3_3p.dta": "wave3",
        "L_CoronavirusImpact_4.0p.dta": "wave2",
        "L_CoronavirusImpact_3p.dta": "wave1",
        "new_name": "variable",
        "topic_english": "Topic",
        "type": "Type",
        "new_name": "Variable",
    }
)

<IPython.core.display.Javascript object>

Select wave and language indicator. `language` should be one lower-case letter. `waves` should be a list of one or more values.

In [5]:
waves = [3]
language_short = "d"
language_long = "dutch"

<IPython.core.display.Javascript object>

Create decription table and save to csv.

In [6]:
variable_table = create_description_table(table, waves, language_short)

<IPython.core.display.Javascript object>

In [7]:
if len(waves) == 1:
    path = f"../docs/source/wave-{waves[0]}/"
else:
    path = f"../docs/source/"

<IPython.core.display.Javascript object>

In [8]:
variable_table

Unnamed: 0,Variable,Links Wave 3,Type,Topic,Reference Period Other Than Survey Period
5,p_2m_infected,:ref:`ðŸ”— <w3e-q1header>`,float,Health Beliefs,
6,p_2m_acquaintance_infected,:ref:`ðŸ”— <w3e-q1header>`,float,Health Beliefs,
7,p_2m_hospital_if_infect_self,:ref:`ðŸ”— <w3e-q1header>`,float,Health Beliefs,
8,p_2m_infected_and_pass_on,:ref:`ðŸ”— <w3e-q1header>`,float,Health Beliefs,
34,approp_gov_medical,:ref:`ðŸ”— <w3e-q33>`,Ordered Categorical,Policies,
...,...,...,...,...,...
934,teacher_platform_sec_child5,:ref:`ðŸ”— <w3e-v6_bokind>`,bool,Childcare,
935,teacher_pickup_sec_child5,:ref:`ðŸ”— <w3e-v6_bokind>`,bool,Childcare,
936,teacher_none_sec_child5,:ref:`ðŸ”— <w3e-v6_bokind>`,bool,Childcare,
937,teacher_other_sec_child5,:ref:`ðŸ”— <w3e-v6_bokind>`,bool,Childcare,


<IPython.core.display.Javascript object>

In [9]:
if language_short == "e":
    variable_table.to_csv(
        f"{path}variable_table-waves-{'-'.join(str(i) for i in waves)}-{language_short}.csv",
        sep=",",
        index=False,
    )

<IPython.core.display.Javascript object>

## Create overview table for topics

In [10]:
import os
from functions import add_to_file

<IPython.core.display.Javascript object>

In [11]:
def create_overview_page(topic_dict, wave, language, path):

    file_name = f"wave{wave}_questions_{language}_topics"
    target = f"{path}{file_name}.rst"
    if os.path.exists(target):
        os.remove(target)
    add_to_file(f".. _{file_name}:", target)
    add_to_file("\n", target)
    title = (
        f"Overview of Questions Wave {wave} ({language.capitalize()}) Grouped by Topic"
    )
    add_to_file(title, target)
    add_to_file("=" * len(title), target)
    add_to_file(
        "This page contains the questions grouped by topic. "
        "When clicking into a question, please note that internally the questions are "
        "ordered according to their appearance in the questionnaire. Clicking on "
        "the next or previous question will thus not preseve the grouping by topic.",
        target,
    )
    add_to_file("\n", target)

    for key in topic_dict.keys():
        add_to_file(key, target)
        add_to_file("-" * len(key), target)
        add_to_file("", target)
        add_to_file(".. toctree::", target)
        add_to_file("   :maxdepth: 1", target)
        add_to_file("", target)
        for question in topic_dict[key]:
            add_to_file(f"   {language}/{question}", target)
        add_to_file("\n", target)

<IPython.core.display.Javascript object>

In [12]:
if len(waves) == 1:
    topic_table = create_description_table(
        table, waves, language_short, return_links_as_symbols=False
    )
    topic_dict = topic_table.groupby("Topic")[f"Links Wave {waves[0]}"].unique()
    for key in ["skip", "Skip"]:
        if key in topic_dict.keys():
            topic_dict = topic_dict.drop(key)
    create_overview_page(topic_dict, waves[0], language_long, path)

<IPython.core.display.Javascript object>