## id: C001

In [2]:
import pandas as pd
import numpy as np
import configparser
import module_table_writer as tw
import module_data_wrangling as dw

question_id = 'C001'
#question_title='How were you made aware of the opportunity to do a PhD at the HPI?'
selected_code = 2 #code that indicates that the question was answered

#Enrolled demographic
enrolled_column_key = 'B003'
enrolled_column_name = 'Enrolled'
enrolled_dictionary = {'0':'Enrolled','B003_01':'Intend to enroll','B003_02':'Intend to re-enroll'}  
#'B003_05':'Do not intend to finish the PhD' -> only two people and answers are mostly empty, so I removed.

#Gender demographic
gender_column_key = 'B004'
gender_column_name = 'Gender'
gender_dictionary = {1:'Female',2:'Male'}
#3:'Non-binary' no one selected this option
#4:'Diverse' only one person selected this option

#Citizenship demographic
citizenship_column_key = 'B007'
citizenship_column_name = 'Citizenship'
citizenship_dictionary = {1:'Germany',2:'EU',3:'Non-EU'}


#-----------------------------------------------------------------
# LOAD CONFIGURATION FILE
config = configparser.ConfigParser()
config.read('config.ini')
project_path = config['file.loading']['project_path']
data_folder = config['file.loading']['data_folder']
latex_tables_folder = config['file.loading']['latex_tables_folder']
question_index_file = config['file.loading']['question_index_file']
data_file = config['file.loading']['data_file']
sep = config['file.loading']['sep']

#----------------------------------------------------------------
#LOAD QUESTION INDEX (Questions id, Question text, Answer alternatives, Details)
question_index_path = f'{project_path}/{data_folder}/{question_index_file}'
#print(question_index_path)
id_data_frame = pd.read_csv(question_index_path,encoding = 'ISO-8859-1')
filtered_df = id_data_frame[id_data_frame['ID']==question_id]
question_title = filtered_df['Question'].iloc[0]

print('ID:'+question_id+', Question Title:' + question_title)

#extract the answer options for this question (it is a column)
options_dict = dw.generate_options_dictionary(filtered_df,'Alternatives')

#----------------------------------------------------------------
#LOAD DATA FILE AND FILTER NECESSARY COLUMNS
file_name_path = f'{project_path}/{data_folder}/{data_file}'
data_frame = pd.read_csv(file_name_path,encoding = 'ISO-8859-1')

my_list = [enrolled_column_key, gender_column_key, citizenship_column_key, question_id]

# Get a list of all keys
options_columns_list = list(options_dict.keys())
options_names_list = list(options_dict.values())

selected_columns = my_list + options_columns_list

# Create a new DataFrame with only the selected columns
data_frame = data_frame[selected_columns]

#----------------------------------------------------------------
#CREATE LATEX TABLE FILE
tables_path = f'{project_path}/{latex_tables_folder}/{question_id[:2]}/'
tables_file_name = f'{question_id}'

ID:C001, Question Title:How were you made aware of the opportunity to do a PhD at the HPI?
{'C001_01': 'Web', 'C001_02': 'Newsletters', 'C001_03': 'Academic conferences', 'C001_04': 'Friends/colleagues', 'C001_05': 'I completed my Bachelor/Master at HPI', 'C001_06': 'Other'}


### Enrollment count and percentages

In [3]:
group_column_key = enrolled_column_key
dict_column_names = enrolled_dictionary
#ALL
count_df = dw.percentage_options(df_data=data_frame,
                                options_columns=options_columns_list,
                                options_names=options_names_list,
                                selected_code=selected_code)
print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

# Filter by enrolled
group_categories = dict_column_names.keys()

for group_category in group_categories:
    filtered_df = data_frame[data_frame[group_column_key] == group_category]    
    if(len(filtered_df.index > 0)):#ignores group_categories that are not present 
           
        count_df = dw.percentage_options(df_data = filtered_df,
                                options_columns=options_columns_list,
                                options_names=options_names_list,
                                selected_code=selected_code)
        group_name = dict_column_names.get(group_category)
        print('Table for '+group_name)
    
        #Rename columns before printing
        count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
        display(count_df)

        latex_table = tw.write_latex_table(count_df,False, group_category, 
                                    question_id, question_title,column_format='@{}lcc')
        tw.table_to_file(latex_table,tables_path,tables_file_name)


Table for All


Unnamed: 0,Answers,Count,Percentage
0,Web,16,14.04
1,Newsletters,2,1.75
2,Academic conferences,4,3.51
3,Friends/colleagues,38,33.33
4,I completed my Bachelor/Master at HPI,45,39.47
5,Other,9,7.89
6,TOTAL,114,100.0


Table for Enrolled


Unnamed: 0,Answers,Count,Percentage
0,Web,15,14.42
1,Newsletters,1,0.96
2,Academic conferences,4,3.85
3,Friends/colleagues,36,34.62
4,I completed my Bachelor/Master at HPI,42,40.38
5,Other,6,5.77
6,TOTAL,104,100.0


Table for Intend to enroll


Unnamed: 0,Answers,Count,Percentage
0,Web,1,12.5
1,Newsletters,1,12.5
2,Academic conferences,0,0.0
3,Friends/colleagues,2,25.0
4,I completed my Bachelor/Master at HPI,1,12.5
5,Other,3,37.5
6,TOTAL,8,100.0


Table for Intend to re-enroll


Unnamed: 0,Answers,Count,Percentage
0,Web,0,0.0
1,Newsletters,0,0.0
2,Academic conferences,0,0.0
3,Friends/colleagues,0,0.0
4,I completed my Bachelor/Master at HPI,2,100.0
5,Other,0,0.0
6,TOTAL,2,100.0


### Gender count and percentages

In [4]:
group_column_key = gender_column_key
dict_column_names = gender_dictionary
#ALL
count_df = dw.percentage_options(df_data=data_frame,
                                options_columns=options_columns_list,
                                options_names=options_names_list,
                                selected_code=selected_code)
print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

# Filter by enrolled
group_categories = dict_column_names.keys()

for group_category in group_categories:
    filtered_df = data_frame[data_frame[group_column_key] == group_category]    
    if(len(filtered_df.index > 0)):#ignores group_categories that are not present 
           
        count_df = dw.percentage_options(df_data = filtered_df,
                                options_columns=options_columns_list,
                                options_names=options_names_list,
                                selected_code=selected_code)
        group_name = dict_column_names.get(group_category)
        print('Table for '+group_name)
    
        #Rename columns before printing
        count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
        display(count_df)

        latex_table = tw.write_latex_table(count_df,False, group_category, 
                                    question_id, question_title,column_format='@{}lcc')
        tw.table_to_file(latex_table,tables_path,tables_file_name)

Table for All


Unnamed: 0,Answers,Count,Percentage
0,Web,16,14.04
1,Newsletters,2,1.75
2,Academic conferences,4,3.51
3,Friends/colleagues,38,33.33
4,I completed my Bachelor/Master at HPI,45,39.47
5,Other,9,7.89
6,TOTAL,114,100.0


Table for Female


Unnamed: 0,Answers,Count,Percentage
0,Web,4,16.67
1,Newsletters,2,8.33
2,Academic conferences,2,8.33
3,Friends/colleagues,10,41.67
4,I completed my Bachelor/Master at HPI,5,20.83
5,Other,1,4.17
6,TOTAL,24,100.0


Table for Male


Unnamed: 0,Answers,Count,Percentage
0,Web,12,13.48
1,Newsletters,0,0.0
2,Academic conferences,2,2.25
3,Friends/colleagues,28,31.46
4,I completed my Bachelor/Master at HPI,39,43.82
5,Other,8,8.99
6,TOTAL,89,100.0


### Citizenship count and percentages 

In [5]:
group_column_key = citizenship_column_key
dict_column_names = citizenship_dictionary
#ALL
count_df = dw.percentage_options(df_data=data_frame,
                                options_columns=options_columns_list,
                                options_names=options_names_list,
                                selected_code=selected_code)
print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

# Filter by enrolled
group_categories = dict_column_names.keys()

for group_category in group_categories:
    filtered_df = data_frame[data_frame[group_column_key] == group_category]    
    if(len(filtered_df.index > 0)):#ignores group_categories that are not present 
           
        count_df = dw.percentage_options(df_data = filtered_df,
                                options_columns=options_columns_list,
                                options_names=options_names_list,
                                selected_code=selected_code)
        group_name = dict_column_names.get(group_category)
        print('Table for '+group_name)
    
        #Rename columns before printing
        count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
        display(count_df)

        latex_table = tw.write_latex_table(count_df,False, group_category, 
                                    question_id, question_title,column_format='@{}lcc')
        tw.table_to_file(latex_table,tables_path,tables_file_name)

Table for All


Unnamed: 0,Answers,Count,Percentage
0,Web,16,14.04
1,Newsletters,2,1.75
2,Academic conferences,4,3.51
3,Friends/colleagues,38,33.33
4,I completed my Bachelor/Master at HPI,45,39.47
5,Other,9,7.89
6,TOTAL,114,100.0


Table for Germany


Unnamed: 0,Answers,Count,Percentage
0,Web,11,12.5
1,Newsletters,0,0.0
2,Academic conferences,1,1.14
3,Friends/colleagues,28,31.82
4,I completed my Bachelor/Master at HPI,43,48.86
5,Other,5,5.68
6,TOTAL,88,100.0


Table for EU


Unnamed: 0,Answers,Count,Percentage
0,Web,1,9.09
1,Newsletters,1,9.09
2,Academic conferences,1,9.09
3,Friends/colleagues,5,45.45
4,I completed my Bachelor/Master at HPI,1,9.09
5,Other,2,18.18
6,TOTAL,11,100.0


Table for Non-EU


Unnamed: 0,Answers,Count,Percentage
0,Web,4,28.57
1,Newsletters,1,7.14
2,Academic conferences,2,14.29
3,Friends/colleagues,5,35.71
4,I completed my Bachelor/Master at HPI,0,0.0
5,Other,2,14.29
6,TOTAL,14,100.0
