In [1]:
import soepdoku as soep
import tkinter as tk
import tkinter.ttk as ttk
from os.path import join
from types import MethodType
from soepversionizer import Database
from soepversionizer.algorithms import get_questions_with_same_item_name



path1 = "Q:/Dokumentation/"  # path to SOEP-Core documentation
path2 = "Q:/soep-lee2-compare/" # path to SOEP-LEE2-Compare documentation
version = 'v39'

questionnaire_files = [
    join(path1, 'questionnaires/soep-core-2021-lee2estab/questions.csv'),                           # SOEP-Betriebe, questionnaires
    join(path1, 'questionnaires/soep-core-2023-lee2estab/questions.csv'),
    join(path1, 'questionnaires/soep-core-2020-selfempl/questions.csv'),                            # SOEP-Selbstständinge, questionnaires
    join(path1, 'questionnaires/soep-core-2022-selfempl/questions.csv'),
    join(path1, 'questionnaires/soep-core-2024-selfempl/questions.csv'),
    join(path2, 'questionnaires/soep-lee2-compare-2022-estab/questions.csv'),                       # IAB-Betriebe, questionnaires
    join(path2, 'questionnaires/soep-lee2-compare-2023-estab/questions.csv'),
    join(path2, 'questionnaires/soep-lee2-compare-2024-estab/questions.csv'),
]




In [109]:
WIDTH = 1500
HEIGHT = 500
FONTS = {'main': ('Arial', 12)}
PADDINGS = {
    'normal': {'padx': 5, 'pady': 5},
    'small': {'padx': 2, 'pady': 2},
}
COLUMNS = {
    'study': 'study',
    'questionnaire': 'questionnaire',
    'question': 'question',
    'item': 'item', 
    'text': 'text_de', 
    'instruction': 'instruction_de', 
    'scale': 'scale',
    'answer_list': 'answer_list',
    'filter': 'filter',
}

class Versionizer():

    def __init__(self, questionnaire_files, doku_repos, version, build_db_at_start=True, questions=None, quest_expl=None):

        # Build database at startup, takes some time
        if build_db_at_start==True:
            database = Database(paths = questionnaire_files, doku_repos=doku_repos, version=version)
            database.build()
            questions, quest_expl = database.merge_quest_log_gen()

        self.questionnaire_files = questionnaire_files
        self.questions = questions
        self.quest_expl = quest_expl
    
        self.root = tk.Tk()
        self.root.title("SOEP Variable Versionizer")

        self.width = WIDTH
        self.height = HEIGHT
        self.root.geometry(f"{self.width}x{self.height}")
        self.font_main = FONTS['main']
        
        #self.label = tk.Label(self.root, text="Something", font=self.font_main)
        #self.label.pack(padx=10, pady=10)

        # Main grid layout
        self.mainframe = tk.Frame(self.root)
        self.mainframe.pack(fill="both", expand=True)
        #self.mainframe.pack(fill='x')

        self.mainframe.columnconfigure(0, weight=1) # content column
        self.mainframe.columnconfigure(1, weight=0) # scrollbar
        self.mainframe.columnconfigure(2, weight=1) # content column 
        self.mainframe.rowconfigure(2, weight=1)
        #self.mainframe.rowconfigure(3, weight=1)

       

        # Scrollable canvas + frame for questions 
        self.canvas = tk.Canvas(self.mainframe)
        self.canvas.grid(column=0, row=2, sticky="nsew", **PADDINGS['normal'])
        #self.canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        v_scrollbar = ttk.Scrollbar(self.mainframe, orient=tk.VERTICAL, command=self.canvas.yview)
        v_scrollbar.grid(row=2, column=1, sticky="ns")
        self.canvas.configure(yscrollcommand=v_scrollbar.set)

        self.scrollable_frame = ttk.Frame(self.canvas)
        self.scrollable_frame.columnconfigure(0, weight=1)
        self.scrollable_frame.columnconfigure(1, weight=1)
        self.canvas.create_window((0, 0), window=self.scrollable_frame, anchor="nw")
        self.scrollable_frame.bind("<Configure>", self.update_scroll_region)

        # Left and right questionnaire view
        self.questionnaire_view1 = QuestionnaireView(self.scrollable_frame, col=0, row=2)
        self.questionnaire_view2 = QuestionnaireView(self.mainframe, col=2, row=2)
        #self.questionnaire_view3 = QuestionnaireView(self.scrollable_frame, col=0, row=3, title="Associated questions" )

        # Currently selected data files
        self.selected_file1 = tk.StringVar(self.root)
        self.selected_file1.set(self.questionnaire_files[0])
        self.selected_study1 = tk.StringVar(self.root)
        self.selected_study1.set("")
        self.selected_questionnaire1 = tk.StringVar(self.root)
        self.selected_questionnaire1.set("")
        
        self.selected_file2 = tk.StringVar(self.root)
        self.selected_file2.set(self.questionnaire_files[0])

        # Relate data with viewers and buttons
        self.data1 = Data(
            viewer = self.questionnaire_view1,
            buttons = QuestionButtons(self.mainframe, row=1, column=0, viewers=[self.questionnaire_view1]),
            #associated_viewer=self.questionnaire_view3
        )

        self.data2 = Data(
            viewer = self.questionnaire_view2,
            buttons = QuestionButtons(self.mainframe, row=1, column=2, viewers=[self.questionnaire_view2])
        )

        #self.data3 = Data(
        #    viewer = self.questionnaire_view3,
        #   buttons = None,
        #    data = self.questions,
        #    aux_data= self.quest_expl,
        #)

        # Configure views
        #self.questionnaire_view3.data = self.questions
        #self.questionnaire_view3.aux_data = self.quest_expl

        #self.questionnaire_view1.update = MethodType(select_new_question, self.questionnaire_view1)
        #self.questionnaire_view2.update = MethodType(select_new_question, self.questionnaire_view2)
        #self.questionnaire_view3.update = MethodType(show_questions_with_same_item_name, self.questionnaire_view3)

        # Left and right dropdown menu
        self.file_menu(self.mainframe, variable=self.selected_file1, col=0, row=0, output=self.data1)
        self.file_menu(self.mainframe, variable=self.selected_file2, col=2, row=0, output=self.data2)

        self.root.mainloop()

    def update_scroll_region(self, event):
        self.canvas.configure(scrollregion=self.canvas.bbox("all"))


    def file_menu(self, root, variable, col=0, row=0, output=None):

        file_menu = ttk.Combobox(root, textvariable=variable, values=self.questionnaire_files, state="readonly", width=300, font=FONTS['main'])
        file_menu.grid(column=col, row=row, sticky=tk.W, **PADDINGS['normal'])
        file_menu.bind("<<ComboboxSelected>>", lambda event: output.update(event, variable, output))


class Data():

    def __init__(self, viewer=None, buttons=None, data=None, aux_data=None, study=None, questionnaire=None, associated_viewer=None):
        self.viewer = viewer
        self.buttons = buttons
        self.data = data
        self.aux_data = aux_data
        self.study = study
        self.questionnaire = questionnaire
        self.associated_viewer = associated_viewer
        self.questions = None
        self.max_col1_length = 7
        

    def update(self, event, input, output):

        # Load new data
        file = input.get()
        self.data = soep.read_csv(file)
        self.questions = self.data['question'].unique().tolist()
        self.study = self.data.loc[0, 'study']
        self.questionnaire = self.data.loc[0, 'questionnaire']
        self.max_col1_length = max(len(i) for i in self.data[COLUMNS['item']].unique())

        # Update elements that display data
        if self.viewer is not None:
            self.viewer.updata_data(input=self)
        if self.associated_viewer is not None:
            self.associated_viewer.update_meta(study=self.study, questionnaire=self.questionnaire)
        if self.buttons is not None:
            self.buttons.update(input=self)


class QuestionButtons:

    def __init__(self, root, row=0, column=0, viewers=None):
        self.frame = tk.Frame(root)
        self.frame.grid(row=row, column=column, sticky="nsew", **PADDINGS['normal'])
        self.viewers = viewers
        self.selected = tk.StringVar(value="") # value of selected button


    def update(self, event=None, input=None, output=None):

        # Remove old buttons
        for widget in self.frame.winfo_children():
            widget.destroy()

        i = 0
        j = 0

        for q in input.questions:
            text = f"{q}"
            button = tk.Radiobutton(
                self.frame, 
                text=text,
                value = text,
                variable=self.selected,
                indicatoron=False,
                command=self.on_select,
            )
            
            button.grid(row=i, column=j, **PADDINGS['small'])
            j +=1
            if j==20:
                i += 1
                j = 0

    def on_select(self):
        for viewer in self.viewers:
            viewer.update(input=self.selected.get())

class QuestionnaireView:

    def __init__(self, root, col=None, row=None, data=None, aux_data=None, title="", study=None, questionnaire=None):
        
        self.root = root
        #self.frame = tk.LabelFrame(root, text=title, **PADDINGS['normal'], font=FONTS['main'], bg='red') 
        self.frame = self.root
        #self.frame = tk.Label(root, text=title, **PADDINGS['normal'], font=FONTS['main']) 
        label = tk.Label(self.root, text="This", **PADDINGS['normal'], font=FONTS['main']) 
        label.grid(row=row, column=0)
        label = tk.Label(self.root, text="Test", **PADDINGS['normal'], font=FONTS['main']) 
        label.grid(row=row, column=1)
        self.data = data
        self.aux_data = aux_data
        self.study = study,
        self.questionnaire = questionnaire

        # Create columns within frame

        #self.frame.grid(row=row, column=col, sticky="nsew", **PADDINGS['normal'])
        #self.frame.columnconfigure(0, weight=0)
        #self.frame.columnconfigure(1, weight=1)
            

        self.quest_views = []

        self.columns = [
            COLUMNS['question'],
            COLUMNS['item'],
            COLUMNS['text'],
            COLUMNS['instruction'], 
            COLUMNS['scale'], 
            COLUMNS['answer_list']
        ]

    def updata_data(self, event=None, input=None, output=None):
        self.data = input.data
        self.questions = input.questions

    def update_meta(self, study, questionnaire):
        self.study = study
        self.questionnaire = questionnaire

    def update(self, event=None, input=None, output=None):
        
        # Get new data
        newdata = [self.data.loc[self.data['question']==input]]

        # Remove previous view
        for widget in self.frame.winfo_children():
            widget.destroy()

        self.quest_views = [QuestionView(self)]

        for q, d in zip(self.quest_views, newdata):
            q.update(input = d)

        


#### Different update functions for QuestionnaireView
def select_new_question(self, event=None, input=None, output=None):

     # Get new data
    newdata = [self.data.loc[self.data['question']==input]]

    # Remove previous view
    for widget in self.frame.winfo_children():
        widget.destroy()

    self.quest_views = [QuestionView(self, data=self.data)]

    for q, d in zip(self.quest_views, newdata):
        q.update(input = d)


def show_questions_with_same_item_name(self, event=None, input=None, output=None):

    # Remove previous view
    for widget in self.frame.winfo_children():
        widget.destroy()

    # Get new data
    newdata = get_questions_with_same_item_name(
        self.data, 
        self.aux_data, 
        study=self.study, # study of viewer1/data1
        questionnaire=self.questionnaire, # questionnaire of viewer/data 1 
        #study = 'soep-core',
        #questionnaire = 'soep-core-2021-lee2estab',
        question=input
    )
    self.quest_views = [QuestionView(self, data=self.data)]
    self.quest_views[0].update(input = newdata)
 


class QuestionView:

    def __init__(self, root, col=None, row=None, data=None, col1_width=7):
        self.root = root
        self.data = data
        self.col1_width = col1_width

    def update(self, event=None, input=None, output=None):
        self.data = input
        #self.col1_width = self.root.get_max_col1_length()
        self.col1_width = 2


        question_groups = self.data.groupby(['study', 'questionnaire', 'question']).size().reset_index()
        
        i = 0 # row count
        for group in question_groups.iterrows():
            study = group[1]['study']
            questionnaire = group[1]['questionnaire']
            question_number = group[1]['question']

            self.display('Question: '+ question_number, row=i, column=0, height=1, width=20)
            self.display(f'{study}/{questionnaire}', row=i, column=1, height=1, width=None)

            data = self.data.loc[(self.data['study']==study) & (self.data['questionnaire']==questionnaire) & (self.data['question']==question_number)]
    
            # Items 
            text = self.get_text(data)
            self.display(text, row=i+1, column=1, height=text.count('\n')+1)
        
            i += 2

    def get_text(self, df):
        text = ""
        rows = df.to_dict('index')
        for k, v in rows.items():
            col1_text = v[COLUMNS['item']] 
            extra_space1 = self.col1_width - len(col1_text) + 2
            text +=  col1_text + ' '*extra_space1 + v[COLUMNS['text']] + '\n'
            
            instruction = v[COLUMNS['instruction']]
            if len(instruction)>0:
                text += instruction + '\n'


        return text

    def display(self, text, row=0, column=0, height=1, width=None):
        print(type(self.root.frame))
        text_box = tk.Text(self.root.frame, height=height, width=width, wrap=tk.WORD, borderwidth=0, bg='green') # 
        text_box.insert(tk.END, text)
        text_box.configure(state="disabled")
        #text_box.pack(anchor="w", fill=tk.X, **PADDINGS['normal'])
        text_box.grid(row=row, column=column, sticky="ew", **PADDINGS['normal'])
        
        #label = tk.Label(self.root, text="This", **PADDINGS['normal'], font=FONTS['main']) 
        #label.grid(row=row, column=0)
        #label = tk.Label(self.root, text="Test", **PADDINGS['normal'], font=FONTS['main']) 
        #label.grid(row=row, column=1)

#doku_repos = [path1, path2]
#database = Database(paths = questionnaire_files, doku_repos=doku_repos, version=version)
#database.build()
#questions, quest_expl = database.merge_quest_log_gen()
        
Versionizer(
    questionnaire_files=questionnaire_files, 
    doku_repos=[path1, path2], 
    version='v39', 
    build_db_at_start=False, 
    quest_expl=quest_expl, 
    questions=questions
)

<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>
<class 'tkinter.ttk.Frame'>


<__main__.Versionizer at 0x1e7df923ac0>

In [3]:
groups = questions.groupby(['study', 'questionnaire', 'question']).size().reset_index()[['study', 'questionnaire']]
print(type(groups))

<class 'pandas.core.frame.DataFrame'>


In [4]:
questions.loc[0, 'study']

'soep-core'

In [4]:
# DataBase.py


paths = get_paths("Q:/soep-lee2-compare/", col_value={'questionnaire': 'soep-lee2-compare-2022-estab'})
#paths = get_paths("Q:/Dokumentation/", col_value={'questionnaire': 'soep-core-2021-lee2estab'})
paths = get_paths("Q:/soep-lee2-compare/", filename='generations.csv', include_version_dir=['v39'])

for p in paths:
    print(p)

Q:/soep-lee2-compare/datasets\estab2022\v39\generations.csv


In [62]:
for root, dirs, files in os.walk('Q:/Dokumentation/datasets/pl/', topdown=True):
    
    basename = os.path.basename(root)

    if (not is_version_dir(basename)):
        print(root, dirs, files)
    
    

Q:/Dokumentation/datasets/pl/ ['v34', 'v35', 'v36', 'v37', 'v38', 'v39', 'v39.1', 'v40'] ['codebook_alt.csv']


In [95]:
path1 = "Q:/Dokumentation/"  # path to SOEP-Core documentation
path2 = "Q:/soep-lee2-compare/" # path to SOEP-LEE2-Compare documentation
version = 'v39'

quest = soep.read_csv(join(path1, 'questionnaires/soep-core-2021-lee2estab/questions.csv'))
answers = soep.read_csv(join(path1, 'questionnaires/soep-core-2021-lee2estab/answers.csv'))

max(len(i) for i in quest[COLUMNS['item']].unique())

24

In [6]:
path1

'Q:/Dokumentation/'

In [15]:

file = files[0]

def window1():
    layout = [
        [sg.Text("Select a file"), sg.Combo(files, key='-COMBO-', enable_events=True)],
        [sg.Text(f"Selected file:"), sg.Text("", key='-FILE-')],
        [sg.Button("Read dataset")],
        [sg.Text("Result:"), sg.Text("", key='-RESULT-', size=(15, 1))],
        [sg.Button("Go to Window 2"), sg.Button("Exit")]
    ]

    return sg.Window("Window 1", layout)


def select_file(window):
    file = values['-COMBO-']
    window.update(file)
    return file

def read_data(window):
    try:
        data = soep.read_csv(file)
        col = data.columns[3]
        value = data.at[0, col]
        window.update(value)
    except:
        pass


window = window1()

while True:
    event, values = window.read()

    if event in (sg.WINDOW_CLOSED, "Exit"):
        break

    elif event == "-COMBO-":
        file = select_file(window['-FILE-'])
        read_data(window['-RESULT-'])

    elif event == "Read dataset":
        read_data(window['-RESULT-'])
       


window.close()

In [None]:
def window2():
    layout = [
        [sg.Text("Welcome to window 2")],
        [sg.Button("Go to Window 1"), sg.Button("Exit")]
    ]

    return sg.Window("Window 1", layout)

    elif event == "Calculate Square":
        try:
            number = float(values['-INPUT-'])
            result = number ** 2
            window['-RESULT-'].update(result)

        except ValueError:
            pass


In [None]:
class Algorithms():

    def __init__(self, questionnaire, paths, doku_repos, questionnaire_path=None):

        self.questionnaire = questionnaire
        self.questionnaire_path = self.get_questionnaire_path(questionnaire_path)
        self.paths = paths # paths to all relevant questionnaires
        self.doku_repos = doku_repos # paths to Documentation top level, ex.: ['C:/Dokumentation', 'C:/Dokumentation_2']

    def get_questionnaire_path(self, path):
        
        if path is None:
            try:
                return self.questionnaire.path
            except:
                return None
        else:
            return path