## question id: I004_03

In [1]:
import pandas as pd
import numpy as np
import configparser
import module_table_writer as tw
import module_data_wrangling as dw

question_id = 'I004_03'
selected_level = {0:'extremely low',1:'very low',2:'low',3:'neutral',4:'high',5:'very high',6:'extremely high'}

#Enrolled demographic
enrolled_column_key = 'B003'
enrolled_column_name = 'Enrolled'
enrolled_dictionary = {'0':'Enrolled','B003_01':'Intend to enroll','B003_02':'Intend to re-enroll'}  
# B003_05: Do not intend to finish the PhD -> only two people and answers are mostly empty, so I removed.

#Gender demographic
gender_column_key = 'B004'
gender_column_name = 'Gender'
gender_dictionary = {1:'Female',2:'Male'}
#3:'Non-binary' no one selected this option
#4:'Diverse' only one person selected this option

#Year at HPI demographic
years_at_HPI_column_key = 'B006'
years_at_HPI_column_name = 'Years at HPI'
years_at_HPI_dictionary = {'B006_01':'1 year or less','B006_02':'2 years','B006_03':'3 years','B006_04':'More than 4 years'}

#Citizenship demographic
citizenship_column_key = 'B007'
citizenship_column_name = 'Citizenship'
citizenship_dictionary = {1:'Germany',2:'EU',3:'Non-EU'}

#-----------------------------------------------------------------
# LOAD CONFIGURATION FILE
config = configparser.ConfigParser()
config.read('config.ini')
project_path = config['file.loading']['project_path']
data_folder = config['file.loading']['data_folder']
latex_tables_folder = config['file.loading']['latex_tables_folder']
question_index_file = config['file.loading']['question_index_file']
data_file = config['file.loading']['data_file']
sep = config['file.loading']['sep']

#----------------------------------------------------------------
#LOAD QUESTION INDEX (Questions id, Question text, Answer alternatives, Details)
question_index_path = f'{project_path}/{data_folder}/{question_index_file}'
#print(question_index_path)
id_data_frame = pd.read_csv(question_index_path,encoding = 'ISO-8859-1')
filtered_index_df = id_data_frame[id_data_frame['ID']==question_id]
question_title = filtered_index_df['Question'].iloc[0]

print('ID:'+question_id+', Question Title:' + question_title)

#----------------------------------------------------------------
#LOAD DATA FILE AND FILTER NECESSARY COLUMNS
file_name_path = f'{project_path}/{data_folder}/{data_file}'
original_data_frame = pd.read_csv(file_name_path,encoding = 'ISO-8859-1')

my_list = [enrolled_column_key, gender_column_key, years_at_HPI_column_key, citizenship_column_key, question_id]

# Get a list of all keys
options_code_list = list(selected_level.keys())
options_names_list = list(selected_level.values())

selected_columns = my_list 

# Create a new DataFrame with only the selected columns
data_frame = original_data_frame[selected_columns]

#----------------------------------------------------------------
#CREATE LATEX TABLE FILE
tables_path = f'{project_path}/{latex_tables_folder}/{question_id[:2]}/'
tables_file_name = f'{question_id}'


ID:I004_03, Question Title:Have you experienced depression or similar situation?


In [2]:
count_df = pd.DataFrame()
print(count_df.size>0)
print (count_df.notnull())

False
Empty DataFrame
Columns: []
Index: []


### Enrollment count and percentages

In [3]:
group_column_key = enrolled_column_key
dict_column_names = enrolled_dictionary
#ALL
count_df = dw.percentage_options_single_column(df_data=data_frame,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)

print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

# Filter by enrolled
group_categories = dict_column_names.keys()

for group_category in group_categories:
    filtered_df = data_frame[data_frame[group_column_key] == group_category]    
    if(len(filtered_df.index > 0)):#ignores group_categories that are not present 
           
        count_df = dw.percentage_options_single_column(df_data=filtered_df,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)
        
        if (count_df.size>0):
            group_name = dict_column_names.get(group_category)
            print('Table for '+group_name)
    
            #Rename columns before printing
            count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
            display(count_df)

            latex_table = tw.write_latex_table(count_df,False, group_category, 
                                    question_id, question_title,column_format='@{}lcc')
            tw.table_to_file(latex_table,tables_path,tables_file_name)



Table for All


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,2,2.99
2,low,11,16.42
3,neutral,12,17.91
4,high,8,11.94
5,very high,16,23.88
6,extremely high,18,26.87
7,TOTAL,67,100.0


Table for Enrolled


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,1,1.61
2,low,11,17.74
3,neutral,12,19.35
4,high,7,11.29
5,very high,15,24.19
6,extremely high,16,25.81
7,TOTAL,62,100.0


Table for Intend to enroll


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,1,20.0
2,low,0,0.0
3,neutral,0,0.0
4,high,1,20.0
5,very high,1,20.0
6,extremely high,2,40.0
7,TOTAL,5,100.0


### Gender count and percentages

In [4]:
group_column_key = gender_column_key
dict_column_names = gender_dictionary
#ALL
count_df = dw.percentage_options_single_column(df_data=data_frame,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)

print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

# Filter by enrolled
group_categories = dict_column_names.keys()

for group_category in group_categories:
    filtered_df = data_frame[data_frame[group_column_key] == group_category]    
    if(len(filtered_df.index > 0)):#ignores group_categories that are not present 
    
        count_df = dw.percentage_options_single_column(df_data=filtered_df,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)
        if(count_df.size>0):
            group_name = dict_column_names.get(group_category)
            print('Table for '+group_name)
    
            #Rename columns before printing
            count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
            display(count_df)

            latex_table = tw.write_latex_table(count_df,False, group_category, 
                                    question_id, question_title,column_format='@{}lcc')
            tw.table_to_file(latex_table,tables_path,tables_file_name)

Table for All


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,2,2.99
2,low,11,16.42
3,neutral,12,17.91
4,high,8,11.94
5,very high,16,23.88
6,extremely high,18,26.87
7,TOTAL,67,100.0


Table for Female


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,0,0.0
2,low,2,14.29
3,neutral,2,14.29
4,high,1,7.14
5,very high,2,14.29
6,extremely high,7,50.0
7,TOTAL,14,100.0


Table for Male


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,2,3.85
2,low,9,17.31
3,neutral,10,19.23
4,high,7,13.46
5,very high,13,25.0
6,extremely high,11,21.15
7,TOTAL,52,100.0


### Year at HPI

In [5]:
group_column_key = years_at_HPI_column_key
dict_column_names = years_at_HPI_dictionary
#ALL
count_df = dw.percentage_options_single_column(df_data=data_frame,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)

print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

# Filter by enrolled
group_categories = dict_column_names.keys()

for group_category in group_categories:
    filtered_df = data_frame[data_frame[group_column_key] == group_category]    
    if(len(filtered_df.index > 0)):#ignores group_categories that are not present 
           
        count_df = dw.percentage_options_single_column(df_data=filtered_df,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)
        if(count_df.size>0):
            group_name = dict_column_names.get(group_category)
            print('Table for '+group_name)
    
            #Rename columns before printing
            count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
            display(count_df)

            latex_table = tw.write_latex_table(count_df,False, group_category, 
                                    question_id, question_title,column_format='@{}lcc')
            tw.table_to_file(latex_table,tables_path,tables_file_name)


Table for All


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,2,2.99
2,low,11,16.42
3,neutral,12,17.91
4,high,8,11.94
5,very high,16,23.88
6,extremely high,18,26.87
7,TOTAL,67,100.0


Table for 1 year or less


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,1,7.14
2,low,1,7.14
3,neutral,2,14.29
4,high,1,7.14
5,very high,4,28.57
6,extremely high,5,35.71
7,TOTAL,14,100.0


Table for 2 years


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,0,0.0
2,low,6,26.09
3,neutral,4,17.39
4,high,3,13.04
5,very high,5,21.74
6,extremely high,5,21.74
7,TOTAL,23,100.0


Table for 3 years


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,0,0.0
2,low,2,15.38
3,neutral,4,30.77
4,high,2,15.38
5,very high,2,15.38
6,extremely high,3,23.08
7,TOTAL,13,100.0


Table for More than 4 years


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,1,5.88
2,low,2,11.76
3,neutral,2,11.76
4,high,2,11.76
5,very high,5,29.41
6,extremely high,5,29.41
7,TOTAL,17,100.0


### Citizenship count and percentages 

In [6]:
group_column_key = citizenship_column_key
dict_column_names = citizenship_dictionary
#ALL
count_df = dw.percentage_options_single_column(df_data=data_frame,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)

print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

# Filter by enrolled
group_categories = dict_column_names.keys()

for group_category in group_categories:
    filtered_df = data_frame[data_frame[group_column_key] == group_category]    
    if(len(filtered_df.index > 0)):#ignores group_categories that are not present 
           
        count_df = dw.percentage_options_single_column(df_data=filtered_df,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)
        
        if(count_df.size>0):
            group_name = dict_column_names.get(group_category)
            print('Table for '+group_name)
    
            #Rename columns before printing
            count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
            display(count_df)

            latex_table = tw.write_latex_table(count_df,False, group_category, 
                                    question_id, question_title,column_format='@{}lcc')
            tw.table_to_file(latex_table,tables_path,tables_file_name)

Table for All


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,2,2.99
2,low,11,16.42
3,neutral,12,17.91
4,high,8,11.94
5,very high,16,23.88
6,extremely high,18,26.87
7,TOTAL,67,100.0


Table for Germany


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,2,4.0
2,low,8,16.0
3,neutral,12,24.0
4,high,5,10.0
5,very high,10,20.0
6,extremely high,13,26.0
7,TOTAL,50,100.0


Table for EU


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,0,0.0
2,low,2,25.0
3,neutral,0,0.0
4,high,1,12.5
5,very high,4,50.0
6,extremely high,1,12.5
7,TOTAL,8,100.0


Table for Non-EU


Unnamed: 0,Answers,Count,Percentage
0,extremely low,0,0.0
1,very low,0,0.0
2,low,1,11.11
3,neutral,0,0.0
4,high,2,22.22
5,very high,2,22.22
6,extremely high,4,44.44
7,TOTAL,9,100.0
