In [1]:
%load_ext nb_black
import os
import pandas as pd
import numpy as np

<IPython.core.display.Javascript object>

## Functions

In [25]:
def create_description_table(table, waves, language, return_links_as_symbols=True):
    """ Create variable overview from data description table of liss data."""
    table = table.copy()
    # Fill in nans as -.
    table = table.fillna("-")
    # Drop columns.
    table = table.drop(["label_english", "categories_english"], axis=1)
    table = table[table["question_group"] != "-"]
    table = table[table["Variable"] != "-"]
    for i in [1, 2, 3]:
        if i not in waves:
            table = table.drop([f"wave{i}"], axis=1)

    # Drop observations if they are not in any included wave.
    if len(waves) == 1:
        table = table[table[f"wave{waves[0]}"] != "-"]
    elif len(waves) == 2:
        table = table[
            (table[f"wave{waves[0]}"] != "-") | (table[f"wave{waves[1]}"] != "-")
        ]
    else:
        table = table[
            (table[f"wave{waves[0]}"] != "-")
            | (table[f"wave{waves[1]}"] != "-")
            | (table[f"wave{waves[2]}"] != "-")
        ]

    # Create question group id.
    for i in waves:
        table[f"group_w{i}"] = np.where(
            table[f"wave{i}"] == "-", table[f"wave{i}"], table["question_group"]
        )

    # create question link ":ref:`wNL-{question_group}`.
    for i in waves:
        if return_links_as_symbols:
            table[f"group_w{i}"] = table[f"group_w{i}"].apply(
                lambda x: f":ref:`🔗 <w{str(i)}{language}-{x}>`"
            )
        else:
            table[f"group_w{i}"] = table[f"group_w{i}"].apply(
                lambda x: f"w{str(i)}{language}-{x}"
            )
        table[f"group_w{i}"] = np.where(
            table[f"group_w{i}"].str.contains("--") == False, table[f"group_w{i}"], "-"
        )

    # Final formatting
    # Replace ordered categoricals to drop 'ordered' column.
    table["Type"] = np.where(
        table["ordered"] != True, table["Type"], "Ordered Categorical"
    )

    # Ensure correct formatting for reference period column,
    # create as new column to esnure it is last in table.
    table = table.replace("-", -111)
    table["Reference Period Other Than Survey Period"] = table[
        "reference period other than survey period"
    ].astype(int)

    # Drop unneeded columns.
    table = table.drop(
        ["question_group", "ordered", "reference period other than survey period"],
        axis=1,
    )

    # Replace missings with empty strig so they don't show up in table.
    table = table.replace(-111, " ")

    # Final ordering and rename wave link headers.
    final_table = pd.DataFrame()
    final_table["Variable"] = table["Variable"]

    for i in waves:
        final_table[f"Links Wave {i}"] = table[f"group_w{i}"]
        final_table[
            ["Type", "Topic", "Reference Period Other Than Survey Period"]
        ] = table[["Type", "Topic", "Reference Period Other Than Survey Period"]]
        final_table[f"Links Wave {i}"] = table[f"group_w{i}"]
        if len(waves) == 1:
            final_table["Question Id"] = table[f"wave{i}"]
            final_table[f"Routing"] = table[f"routing_wave_{i}"]

    return final_table

<IPython.core.display.Javascript object>

## Create Table 

Read in description table 

In [26]:
table = pd.read_csv(
    "tables/xyx-corona-questionnaire_renaming_including_routing.csv", sep=";"
)

<IPython.core.display.Javascript object>

Rename columns so they are easier to handle for the functions

In [27]:
table = table.rename(
    columns={
        "L_CoronavirusImpact_wave3_4p.dta": "wave3",
        "L_CoronavirusImpact_4.0p.dta": "wave2",
        "L_CoronavirusImpact_3p.dta": "wave1",
        "new_name": "variable",
        "topic_english": "Topic",
        "type": "Type",
        "new_name": "Variable",
    }
)

<IPython.core.display.Javascript object>

Select wave and language indicator. `language` should be one lower-case letter. `waves` should be a list of one or more values.

In [51]:
waves = [2]
language_short = "e"
language_long = "english"

<IPython.core.display.Javascript object>

Create decription table and save to csv.

In [52]:
table

Unnamed: 0,Variable,reference period other than survey period,label_english,wave3,wave2,wave1,Type,categories_english,ordered,Topic,question_group,routing_wave_1,routing_wave_2,routing_wave_3
0,l_2m_infected,,How likely do you think it is that you will be...,,,q1a,Categorical,"not at all likely, very unlikely, rather unlik...",True,Health Beliefs,q1,,,
1,l_2m_acquaintance_infected,,How likely do you think it is that someone in ...,,,q1b,Categorical,"not at all likely, very unlikely, rather unlik...",True,Health Beliefs,q1,,,
2,l_2m_hospital_if_infect_self,,How likely do you think it is that you will ne...,,,q1c,Categorical,"not at all likely, very unlikely, rather unlik...",True,Health Beliefs,q1,,,
3,l_2m_quarantine,,How likely do you think it is that you will be...,,,q1d,Categorical,"not at all likely, very unlikely, rather unlik...",True,Health Beliefs,q1,,,
4,l_2m_infected_and_pass_on,,How likely do you think it is that you will ge...,,,q1e,Categorical,"not at all likely, very unlikely, rather unlik...",True,Health Beliefs,q1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
934,teacher_platform_sec_child5,,Teacher(s) and pupil/parents communicated and ...,v6_vokind5__5,,,bool,,,Childcare,v6_bokind,,,
935,teacher_pickup_sec_child5,,Teaching materials had to be picked up at school,v6_vokind5__6,,,bool,,,Childcare,v6_bokind,,,
936,teacher_none_sec_child5,,There was no communication between teacher(s) ...,v6_vokind5__7,,,bool,,,Childcare,v6_bokind,,,
937,teacher_other_sec_child5,,"Another way of communication/exchange, namely:",v6_vokind5__8,,,bool,,,Childcare,v6_bokind,,,


<IPython.core.display.Javascript object>

In [53]:
variable_table = create_description_table(table, waves, language_short)

<IPython.core.display.Javascript object>

In [54]:
if len(waves) == 1:
    path = f"../docs/source/wave-{waves[0]}/"
else:
    path = f"../docs/source/"

<IPython.core.display.Javascript object>

In [55]:
variable_table

Unnamed: 0,Variable,Links Wave 2,Type,Topic,Reference Period Other Than Survey Period,Question Id,Routing
5,p_2m_infected,:ref:`🔗 <w2e-q1header>`,float,Health Beliefs,,q1a,
6,p_2m_acquaintance_infected,:ref:`🔗 <w2e-q1header>`,float,Health Beliefs,,q1b,
7,p_2m_hospital_if_infect_self,:ref:`🔗 <w2e-q1header>`,float,Health Beliefs,,q1c,
8,p_2m_infected_and_pass_on,:ref:`🔗 <w2e-q1header>`,float,Health Beliefs,,q1e,
39,work_status_202002,:ref:`🔗 <w2e-q13>`,Categorical,Employment,202002,q13,if (participate_wave1 != 1)
...,...,...,...,...,...,...,...
324,bought_or_sold_stocks,:ref:`🔗 <w2e-StockTrading>`,Categorical,Macro Expectations,,StockTrading,
325,amount_stocks_bought,:ref:`🔗 <w2e-Stock3>`,Categorical,Macro Expectations,,Stock3,if (StockTrading = 3)
326,amount_stocks_sold,:ref:`🔗 <w2e-Stock4>`,Categorical,Macro Expectations,,Stock4,if (StockTrading = 4)
327,amount_stocks_bought_merge,:ref:`🔗 <w2e-Stock5>`,Categorical,Macro Expectations,,Stock5,if (StockTrading = 5)


<IPython.core.display.Javascript object>

In [56]:
if language_short == "e":
    variable_table.to_csv(
        f"{path}variable_table-waves-{'-'.join(str(i) for i in waves)}-{language_short}.csv",
        sep=",",
        index=False,
    )

<IPython.core.display.Javascript object>

## Create overview table for topics

In [34]:
import os
from functions import add_to_file

<IPython.core.display.Javascript object>

In [35]:
def create_overview_page(topic_dict, wave, language, path):

    file_name = f"wave{wave}_questions_{language}_topics"
    target = f"{path}{file_name}.rst"
    if os.path.exists(target):
        os.remove(target)
    add_to_file(f".. _{file_name}:", target)
    add_to_file("\n", target)
    title = (
        f"Overview of Questions Wave {wave} ({language.capitalize()}) Grouped by Topic"
    )
    add_to_file(title, target)
    add_to_file("=" * len(title), target)
    add_to_file(
        "This page contains the questions grouped by topic. "
        "When clicking into a question, please note that internally the questions are "
        "ordered according to their appearance in the questionnaire. Clicking on "
        "the next or previous question will thus not preseve the grouping by topic.",
        target,
    )
    add_to_file("\n", target)

    for key in topic_dict.keys():
        add_to_file(key, target)
        add_to_file("-" * len(key), target)
        add_to_file("", target)
        add_to_file(".. toctree::", target)
        add_to_file("   :maxdepth: 1", target)
        add_to_file("", target)
        for question in topic_dict[key]:
            add_to_file(f"   {language}/{question}", target)
        add_to_file("\n", target)

<IPython.core.display.Javascript object>

In [36]:
if len(waves) == 1:
    topic_table = create_description_table(
        table, waves, language_short, return_links_as_symbols=False
    )
    topic_dict = topic_table.groupby("Topic")[f"Links Wave {waves[0]}"].unique()
    for key in ["skip", "Skip"]:
        if key in topic_dict.keys():
            topic_dict = topic_dict.drop(key)
    create_overview_page(topic_dict, waves[0], language_long, path)

<IPython.core.display.Javascript object>