# Create Documentation .rst-Files from Data Description Table

In [1]:
%load_ext autoreload
%autoreload 2
import glob
import os
import pathlib
import pandas as pd
import numpy as np
from functions_questions import *

## Read in Codebook & Set Variable Names

### 1. Specify language and wave for the files that will be created.


In [2]:
waveid = "w4"
lanid = "d"
language = "dutch"

### 2. Set target paths for files and images.

In [3]:
# This is where all the .rst files will be saved.
target_dir = f"../docs/source/wave-4/{language}/"

# The image path is defined from the perspective of the docs directory.
image_path = "../_screenshots/"

#### Remove all old files in target folder.

In [4]:
for document in glob.glob(f"{target_dir}*"):
    file = pathlib.Path(document)
    file.unlink()

### 3. Load codebook.

In [5]:
codebook = pd.read_csv("wave-4/codebook-wave-4-english-dutch.csv", sep = ";")
codebook.head(2)

Unnamed: 0,page,id,qid,layout,filter,group,question_dutch,subquestion_dutch,question_english,subquestion_english,type,categories_dutch,categories_english,new_name
0,0.0,intro,intro,open,-,Introduction,Deze maand krijgt u de vierde vragenlijst uit ...,,This month you will receive the fourth questio...,,,,,
1,2.1,q1a,q1header,grid,-,Subjective Risks,Hoe groot denkt u dat de kans is dat de volgen...,u wordt besmet,"On a scale of 0 to 100 percent, how likely do ...",You will be infected,float,,,


### 4. Specifiy column names that should be used to create the files.


* **q_groups**: Name of question groups, each question group gets its own .rst file.
* **q_ids:** Identifier for question groups.
* **q_type**: Should contain the question type. The following types are currently accepted: [Categorical,bool, int, float, str]
* **q_topics**: Topic groups, each topic will get its own .rst-file.
* **q_categories:** Name of the column that contains categories for categorical questions.
* **q_layout:** Layout of the question group. Options: [open, grid, table, multi, cat].
* **q_filter:** Column containing q_id of a question the current question depends on/ is routed from.
* **q_text:** Contains the actual question.
* **q_sub_text:** Contains subquestions.
* **q_numbers:** Used for ordering of questions and subquestions.

In [6]:
q_groups = "group"
q_type = "type"
q_categories = "categories_" + language
q_ids = "qid"
image_id ="image_id"
q_layout="layout"
q_filter="filter"
q_text ="question_" + language
q_sub_text="subquestion_" + language
q_numbers="page"

### 5. Clean codebook from unnecessary entries.

In [7]:
# Remove questions without groups or question id.
codebook = codebook[codebook[q_groups].notna()]
codebook = codebook[codebook[q_ids].notna()]

for val in ['Skip','questionnaire','background']:
    codebook = codebook[codebook[q_groups] != val]

In [8]:
# Sort questions to generate correct order in documentation.
codebook[q_numbers] = codebook[q_numbers].astype("float")
codebook = codebook.sort_values(by=q_numbers)

In [9]:
codebook.head(4)

Unnamed: 0,page,id,qid,layout,filter,group,question_dutch,subquestion_dutch,question_english,subquestion_english,type,categories_dutch,categories_english,new_name
0,0.0,intro,intro,open,-,Introduction,Deze maand krijgt u de vierde vragenlijst uit ...,,This month you will receive the fourth questio...,,,,,
1,2.1,q1a,q1header,grid,-,Subjective Risks,Hoe groot denkt u dat de kans is dat de volgen...,u wordt besmet,"On a scale of 0 to 100 percent, how likely do ...",You will be infected,float,,,
2,2.2,q1b,q1header,grid,-,Subjective Risks,Hoe groot denkt u dat de kans is dat de volgen...,"iemand in uw directe omgeving (familie, vriend...","On a scale of 0 to 100 percent, how likely do ...","Someone in your direct environment (family, fr...",float,,,
3,2.3,q1c,q1header,grid,-,Subjective Risks,Hoe groot denkt u dat de kans is dat de volgen...,u moet naar het ziekenhuis als u wordt besmet,"On a scale of 0 to 100 percent, how likely do ...",You will have to go to the hospital if you get...,float,,,


## Create Documentation

Run the function for the specified inputs to add the documentation files.

In [10]:
create_pages(
    codebook,
    waveid,
    lanid,
    q_ids, 
    q_filter, 
    q_groups, 
    q_layout, 
    q_text, 
    q_sub_text, 
    q_categories, 
    target_dir,
    image_path
)

# Wave 5

In [1]:
%load_ext autoreload
%autoreload 2
import glob
import os
import pathlib
import pandas as pd
import numpy as np
from functions_questions import *

### 1. Specify language and wave for the files that will be created.


In [11]:
waveid = "w5"
lanid = "e"
language = "english"

### 2. Set target paths for files and images.

In [12]:
# This is where all the .rst files will be saved.
target_dir = f"../docs/source/wave-5/{language}/"

# The image path is defined from the perspective of the docs directory.
image_path = "../_screenshots/"

#### Remove all old files in target folder.

In [13]:
for document in glob.glob(f"{target_dir}*"):
    file = pathlib.Path(document)
    file.unlink()

### 3. Load codebook.

In [14]:
codebook = pd.read_csv("wave-5/codebook-wave-5-english-dutch.csv", sep = ";")
codebook.head(2)

Unnamed: 0,page,id,qid,layout,filter,group,question_dutch,subquestion_dutch,question_english,subquestion_english,type,categories_dutch,categories_english,new_name
0,1.01,intro,intro,open,-,Introduction,Deze maand krijgt u de vijfde vragenlijst uit ...,,This month you will receive the fifth question...,,,,,
1,2.01,InfectionDiagnosed,InfectionDiagnosed,cat,-,InfectionDiagnosed,Heeft een huisarts of een ander medisch deskun...,,Has a general practitioner or other medical pr...,,Categorical,"Ja die diagnose is bij mij gesteld,Nee,Het is ...","yes I have been diagnosed with it,no,unsure",


### 4. Specifiy column names that should be used to create the files.


* **q_groups**: Name of question groups, each question group gets its own .rst file.
* **q_ids:** Identifier for question groups.
* **q_type**: Should contain the question type. The following types are currently accepted: [Categorical,bool, int, float, str]
* **q_topics**: Topic groups, each topic will get its own .rst-file.
* **q_categories:** Name of the column that contains categories for categorical questions.
* **q_layout:** Layout of the question group. Options: [open, grid, table, multi, cat].
* **q_filter:** Column containing q_id of a question the current question depends on/ is routed from.
* **q_text:** Contains the actual question.
* **q_sub_text:** Contains subquestions.
* **q_numbers:** Used for ordering of questions and subquestions.

In [15]:
q_groups = "group"
q_type = "type"
q_categories = "categories_" + language
q_ids = "qid"
image_id ="image_id"
q_layout="layout"
q_filter="filter"
q_text ="question_" + language
q_sub_text="subquestion_" + language
q_numbers="page"

### 5. Clean codebook from unnecessary entries.

In [16]:
# Remove questions without groups or question id.
codebook = codebook[codebook[q_groups].notna()]
codebook = codebook[codebook[q_ids].notna()]

for val in ['Skip','questionnaire','background']:
    codebook = codebook[codebook[q_groups] != val]

In [17]:
# Sort questions to generate correct order in documentation.
codebook[q_numbers] = codebook[q_numbers].astype("float")
codebook = codebook.sort_values(by=q_numbers)

In [18]:
codebook.head(4)

Unnamed: 0,page,id,qid,layout,filter,group,question_dutch,subquestion_dutch,question_english,subquestion_english,type,categories_dutch,categories_english,new_name
0,1.01,intro,intro,open,-,Introduction,Deze maand krijgt u de vijfde vragenlijst uit ...,,This month you will receive the fifth question...,,,,,
1,2.01,InfectionDiagnosed,InfectionDiagnosed,cat,-,InfectionDiagnosed,Heeft een huisarts of een ander medisch deskun...,,Has a general practitioner or other medical pr...,,Categorical,"Ja die diagnose is bij mij gesteld,Nee,Het is ...","yes I have been diagnosed with it,no,unsure",
2,3.01,q1a,q1header,grid,-,Subjective Risks,Hoe groot denkt u dat de kans is dat de volgen...,u wordt besmet,What do you think the chances are that the fol...,You will be infected,int,,,
3,3.02,q1b,q1header,grid,-,Subjective Risks,Hoe groot denkt u dat de kans is dat de volgen...,"iemand in uw directe omgeving (familie, vriend...",What do you think the chances are that the fol...,"Someone in your direct environment (family, fr...",int,,,


## Create Documentation

Run the function for the specified inputs to add the documentation files.

In [19]:
create_pages(
    codebook,
    waveid,
    lanid,
    q_ids, 
    q_filter, 
    q_groups, 
    q_layout, 
    q_text, 
    q_sub_text, 
    q_categories, 
    target_dir,
    image_path
)