## question id: E007

In [1]:
import pandas as pd
import configparser
import module_table_writer as tw
import module_data_wrangling as dw

question_id = 'E007'
selected_level = {1:'Yes',2:'No',3:'Not applicable'}

#Enrolled demographic
enrolled_column_key = 'B003'
enrolled_column_name = 'Enrolled'
enrolled_dictionary = {'0':'Enrolled','B003_01':'Intend to enroll','B003_02':'Intend to re-enroll'}  
# B003_05: Do not intend to finish the PhD -> only two people and answers are mostly empty, so I removed.

#Gender demographic
gender_column_key = 'B004'
gender_column_name = 'Gender'
gender_dictionary = {1:'Female',2:'Male'}
#3:'Non-binary' no one selected this option
#4:'Diverse' only one person selected this option

#Year at HPI demographic
years_at_HPI_column_key = 'B006'
years_at_HPI_column_name = 'Years at HPI'
years_at_HPI_dictionary = {'B006_01':'1 year or less','B006_02':'2 years','B006_03':'3 years','B006_04':'More than 4 years'}

#Citizenship demographic
citizenship_column_key = 'B007'
citizenship_column_name = 'Citizenship'
citizenship_dictionary = {1:'Germany',2:'EU',3:'Non-EU'}

#-----------------------------------------------------------------
# LOAD CONFIGURATION FILE
config = configparser.ConfigParser()
config.read('config.ini')
project_path = config['file.loading']['project_path']
data_folder = config['file.loading']['data_folder']
latex_tables_folder = config['file.loading']['latex_tables_folder']
question_index_file = config['file.loading']['question_index_file']
data_file = config['file.loading']['data_file']
sep = config['file.loading']['sep']

#----------------------------------------------------------------
#LOAD QUESTION INDEX (Questions id, Question text, Answer alternatives, Details)
question_index_path = f'{project_path}/{data_folder}/{question_index_file}'
#print(question_index_path)
id_data_frame = pd.read_csv(question_index_path,encoding = 'ISO-8859-1')
filtered_index_df = id_data_frame[id_data_frame['ID']==question_id]
question_title = filtered_index_df['Question'].iloc[0]

print('ID:'+question_id+', Question Title:' + question_title)

#extract the answer options for this question (it is a column)
options_dict = dw.generate_options_dictionary(filtered_index_df,'Alternatives')

#----------------------------------------------------------------
#LOAD DATA FILE AND FILTER NECESSARY COLUMNS
file_name_path = f'{project_path}/{data_folder}/{data_file}'
original_data_frame = pd.read_csv(file_name_path,encoding = 'ISO-8859-1')

my_list = [enrolled_column_key, gender_column_key, years_at_HPI_column_key, citizenship_column_key, question_id]

# Get a list of all keys
options_code_list = list(selected_level.keys())
options_names_list = list(selected_level.values())

selected_columns = my_list 

# Create a new DataFrame with only the selected columns
data_frame = original_data_frame[selected_columns]

#----------------------------------------------------------------
#CREATE LATEX TABLE FILE
tables_path = f'{project_path}/{latex_tables_folder}/{question_id[:2]}/'
tables_file_name = f'{question_id}'


ID:E007, Question Title:If you receive a scholarship from HPI; do you think the offer is competitive with other universities? 
{'E007': 'Yes No'}


#### All (no filter)

In [2]:
#ALL

count_df = dw.percentage_options_single_column(df_data=data_frame,
                                column_name = question_id,
                                options_names=options_names_list,
                                options_codes=options_code_list)

if(count_df.size > 0):
    print('Table for '+'All')
    count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
    display(count_df)
    
    latex_table = tw.write_latex_table(data_df=count_df, 
                                    show_index=False,
                                    filter_name='', 
                                    filter_value='All',
                                    question_id=question_id, 
                                    question_title=question_title,
                                    column_format='@{}lcc'
                                    )
    
    tw.table_to_file(latex_table,tables_path,tables_file_name)
else:
    print('Table for '+'All is empty')   

Table for All


Unnamed: 0,Answers,Count,Percentage
0,Yes,17,19.77
1,No,22,25.58
2,Not applicable,47,54.65
3,TOTAL,86,100.0


AttributeError: 'str' object has no attribute 'size'

### Enrollment count and percentages

In [None]:
tw.filter_write_table_single_column(group_column_key = enrolled_column_key,
                                    dict_column_names = enrolled_dictionary,
                                    df = data_frame,
                                    options_names_list = options_names_list,
                                    options_code_list = options_code_list,
                                    question_id =question_id,
                                    question_title = question_title,
                                    tables_path = tables_path,
                                    tables_file_name = tables_file_name,
                                    filter_name='Enrollment status'
                                    )

### Gender Count and percentages

In [None]:
tw.filter_write_table_single_column(group_column_key = gender_column_key,
                                    dict_column_names = gender_dictionary,
                                    df = data_frame,
                                    options_names_list = options_names_list,
                                    options_code_list = options_code_list,
                                    question_id =question_id,
                                    question_title = question_title,
                                    tables_path = tables_path,
                                    tables_file_name = tables_file_name,
                                    filter_name = 'Gender'
                                    )

### Year at HPI

In [None]:
tw.filter_write_table_single_column(group_column_key = years_at_HPI_column_key,
                                    dict_column_names = years_at_HPI_dictionary,
                                    df = data_frame,
                                    options_names_list = options_names_list,
                                    options_code_list = options_code_list,
                                    question_id =question_id,
                                    question_title = question_title,
                                    tables_path = tables_path,
                                    tables_file_name = tables_file_name,
                                    filter_name = 'Years at HPI'
                                    )

### Citizenship count and percentages 

In [None]:
tw.filter_write_table_single_column(group_column_key = citizenship_column_key,
                                    dict_column_names = citizenship_dictionary,
                                    df = data_frame,
                                    options_names_list = options_names_list,
                                    options_code_list = options_code_list,
                                    question_id =question_id,
                                    question_title = question_title,
                                    tables_path = tables_path,
                                    tables_file_name = tables_file_name,
                                    filter_name = 'Citizenship'
                                    )