## question id: J011

In [1]:
import pandas as pd
import configparser
import module_table_writer as tw
import module_data_wrangling as dw

question_id = 'J011'
selected_code = 2 #code that indicates that the question was answered

#Enrolled demographic
enrolled_column_key = 'B003'
enrolled_column_name = 'Enrolled'
enrolled_dictionary = {'0':'Enrolled','B003_01':'Intend to enroll','B003_02':'Intend to re-enroll'}  
# B003_05: Do not intend to finish the PhD -> only two people and answers are mostly empty, so I removed.

#Gender demographic
gender_column_key = 'B004'
gender_column_name = 'Gender'
gender_dictionary = {1:'Female',2:'Male'}
#3:'Non-binary' no one selected this option
#4:'Diverse' only one person selected this option

#Year at HPI demographic
years_at_HPI_column_key = 'B006'
years_at_HPI_column_name = 'Years at HPI'
years_at_HPI_dictionary = {'B006_01':'1 year or less','B006_02':'2 years','B006_03':'3 years','B006_04':'More than 4 years'}

#Citizenship demographic
citizenship_column_key = 'B007'
citizenship_column_name = 'Citizenship'
citizenship_dictionary = {1:'Germany',2:'EU',3:'Non-EU'}

#-----------------------------------------------------------------
# LOAD CONFIGURATION FILE
config = configparser.ConfigParser()
config.read('config.ini')
project_path = config['file.loading']['project_path']
data_folder = config['file.loading']['data_folder']
latex_tables_folder = config['file.loading']['latex_tables_folder']
question_index_file = config['file.loading']['question_index_file']
data_file = config['file.loading']['data_file']
sep = config['file.loading']['sep']

#----------------------------------------------------------------
#LOAD QUESTION INDEX (Questions id, Question text, Answer alternatives, Details)
question_index_path = f'{project_path}/{data_folder}/{question_index_file}'
#print(question_index_path)
id_data_frame = pd.read_csv(question_index_path,encoding = 'ISO-8859-1')
filtered_index_df = id_data_frame[id_data_frame['ID']==question_id]
question_title = filtered_index_df['Question'].iloc[0]

print('ID:'+question_id+', Question Title:' + question_title)

#extract the answer options for this question (it is a column)
options_dict = dw.generate_options_dictionary(filtered_index_df,'Alternatives')

#----------------------------------------------------------------
#LOAD DATA FILE AND FILTER NECESSARY COLUMNS
file_name_path = f'{project_path}/{data_folder}/{data_file}'
original_data_frame = pd.read_csv(file_name_path,encoding = 'ISO-8859-1')

my_list = [enrolled_column_key, gender_column_key, years_at_HPI_column_key, citizenship_column_key, question_id]

# Get a list of all keys
options_columns_list = list(options_dict.keys())
options_names_list = list(options_dict.values())

selected_columns = my_list + options_columns_list

# Create a new DataFrame with only the selected columns
data_frame = original_data_frame[selected_columns]

#----------------------------------------------------------------
#CREATE LATEX TABLE FILE
tables_path = f'{project_path}/{latex_tables_folder}/{question_id[:2]}/'
tables_file_name = f'{question_id}'


ID:J011, Question Title:Which of the events below did you consider helpful for your career goals/needs?
{'J011_01': 'Job Fair', 'J011_02': 'Company Visit', 'J011_03': 'FutureSoc Symposium', 'J011_04': 'Conferences hosted at the HPI', 'J011_05': 'HPI Connect Events', 'J011_06': 'Other Meetups organized at the HPI'}


#### All (no filter)

In [2]:
#ALL
count_df = dw.percentage_options_multiple_columns(df_data=data_frame,
                                options_columns=options_columns_list,
                                options_names=options_names_list,
                                selected_code=selected_code)
print('Table for '+'All')
count_df = count_df.rename(columns={count_df.columns[0]: 'Answers'})
display(count_df)
latex_table = tw.write_latex_table(count_df,False, 'All', 
                                    question_id, question_title,
                                    column_format='@{}lcc')
tw.table_to_file(latex_table,tables_path,tables_file_name)

Table for All


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,6,25.0
1,Company Visit,5,20.83
2,FutureSoc Symposium,4,16.67
3,Conferences hosted at the HPI,6,25.0
4,HPI Connect Events,2,8.33
5,Other Meetups organized at the HPI,1,4.17
6,TOTAL,24,100.0


### Enrollment count and percentages

In [3]:
tw.filter_write_table_multiple_column(group_column_key = enrolled_column_key,
                    dict_column_names = enrolled_dictionary,
                    df = data_frame,
                    options_names_list = options_names_list,
                    options_columns_list = options_columns_list,
                    selected_code = selected_code,
                    question_id =question_id,
                    question_title = question_title,
                    tables_path = tables_path,
                    tables_file_name = tables_file_name
                    )

Table for "Enrolled"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,4,22.22
1,Company Visit,2,11.11
2,FutureSoc Symposium,4,22.22
3,Conferences hosted at the HPI,5,27.78
4,HPI Connect Events,2,11.11
5,Other Meetups organized at the HPI,1,5.56
6,TOTAL,18,100.0


Table for "Intend to enroll"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,1,25.0
1,Company Visit,2,50.0
2,FutureSoc Symposium,0,0.0
3,Conferences hosted at the HPI,1,25.0
4,HPI Connect Events,0,0.0
5,Other Meetups organized at the HPI,0,0.0
6,TOTAL,4,100.0


Table for "Intend to re-enroll"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,1,50.0
1,Company Visit,1,50.0
2,FutureSoc Symposium,0,0.0
3,Conferences hosted at the HPI,0,0.0
4,HPI Connect Events,0,0.0
5,Other Meetups organized at the HPI,0,0.0
6,TOTAL,2,100.0


### Gender Count and percentages

In [4]:
tw.filter_write_table_multiple_column(group_column_key = gender_column_key,
                    dict_column_names = gender_dictionary,
                    df = data_frame,
                    options_names_list = options_names_list,
                    options_columns_list = options_columns_list,
                    selected_code = selected_code,
                    question_id =question_id,
                    question_title = question_title,
                    tables_path = tables_path,
                    tables_file_name = tables_file_name
                    )

Table for "Female"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,0,0.0
1,Company Visit,2,66.67
2,FutureSoc Symposium,0,0.0
3,Conferences hosted at the HPI,1,33.33
4,HPI Connect Events,0,0.0
5,Other Meetups organized at the HPI,0,0.0
6,TOTAL,3,100.0


Table for "Male"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,6,28.57
1,Company Visit,3,14.29
2,FutureSoc Symposium,4,19.05
3,Conferences hosted at the HPI,5,23.81
4,HPI Connect Events,2,9.52
5,Other Meetups organized at the HPI,1,4.76
6,TOTAL,21,100.0


### Year at HPI

In [5]:
tw.filter_write_table_multiple_column(group_column_key = years_at_HPI_column_key,
                    dict_column_names = years_at_HPI_dictionary,
                    df = data_frame,
                    options_names_list = options_names_list,
                    options_columns_list = options_columns_list,
                    selected_code = selected_code,
                    question_id =question_id,
                    question_title = question_title,
                    tables_path = tables_path,
                    tables_file_name = tables_file_name
                    )

Table for "1 year or less"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,3,37.5
1,Company Visit,3,37.5
2,FutureSoc Symposium,1,12.5
3,Conferences hosted at the HPI,1,12.5
4,HPI Connect Events,0,0.0
5,Other Meetups organized at the HPI,0,0.0
6,TOTAL,8,100.0


Table for "2 years"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,1,33.33
1,Company Visit,1,33.33
2,FutureSoc Symposium,0,0.0
3,Conferences hosted at the HPI,1,33.33
4,HPI Connect Events,0,0.0
5,Other Meetups organized at the HPI,0,0.0
6,TOTAL,3,100.0


Table for "3 years"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,2,22.22
1,Company Visit,1,11.11
2,FutureSoc Symposium,2,22.22
3,Conferences hosted at the HPI,2,22.22
4,HPI Connect Events,1,11.11
5,Other Meetups organized at the HPI,1,11.11
6,TOTAL,9,100.0


Table for "More than 4 years"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,0,0.0
1,Company Visit,0,0.0
2,FutureSoc Symposium,1,25.0
3,Conferences hosted at the HPI,2,50.0
4,HPI Connect Events,1,25.0
5,Other Meetups organized at the HPI,0,0.0
6,TOTAL,4,100.0


### Citizenship count and percentages 

In [6]:
tw.filter_write_table_multiple_column(group_column_key = citizenship_column_key,
                    dict_column_names = citizenship_dictionary,
                    df = data_frame,
                    options_names_list = options_names_list,
                    options_columns_list = options_columns_list,
                    selected_code = selected_code,
                    question_id =question_id,
                    question_title = question_title,
                    tables_path = tables_path,
                    tables_file_name = tables_file_name
                    )

Table for "Germany"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,6,27.27
1,Company Visit,4,18.18
2,FutureSoc Symposium,4,18.18
3,Conferences hosted at the HPI,5,22.73
4,HPI Connect Events,2,9.09
5,Other Meetups organized at the HPI,1,4.55
6,TOTAL,22,100.0


Table for "EU"


Unnamed: 0,Answers,Count,Percentage
0,Job Fair,0,0.0
1,Company Visit,1,50.0
2,FutureSoc Symposium,0,0.0
3,Conferences hosted at the HPI,1,50.0
4,HPI Connect Events,0,0.0
5,Other Meetups organized at the HPI,0,0.0
6,TOTAL,2,100.0


Table for "Non-EU" is empty


### Free Text

In [7]:
#extract the answer options for this question (it is a column)
#ft for free-text
ft_dict = dw.generate_options_dictionary(filtered_index_df,'Details')
ft_column_keys = ft_dict.keys()
ft_column_names = ft_dict.values()

selected_columns = my_list + list(ft_column_keys)

# Create a new DataFrame with only the selected columns
ft_data_frame = original_data_frame[selected_columns]

#for each option, make a table with all the free text that was provided
for column_key in ft_column_keys:
    text_df = pd.DataFrame(ft_data_frame[[column_key,enrolled_column_key,gender_column_key,years_at_HPI_column_key,citizenship_column_key]]).dropna()
    text_df = text_df.sort_values(by=[enrolled_column_key,gender_column_key,years_at_HPI_column_key,citizenship_column_key])

    text_df = dw.replace_code_for_name(text_df,enrolled_column_key,enrolled_dictionary)
    text_df = dw.replace_code_for_name(text_df,gender_column_key,gender_dictionary)
    text_df = dw.replace_code_for_name(text_df,years_at_HPI_column_key,years_at_HPI_dictionary)
    text_df = dw.replace_code_for_name(text_df,citizenship_column_key,citizenship_dictionary)
    
    #Rename columns before printing
    text_df = text_df.rename(columns={text_df.columns[0]: 'Comment',
                                      enrolled_column_key: enrolled_column_name,
                                      gender_column_key: gender_column_name,
                                      years_at_HPI_column_key:years_at_HPI_column_name,
                                      citizenship_column_key:citizenship_column_name})
    
    ft_column_name = ft_dict.get(column_key)
    table_caption=f'Comments for {column_key} relative to {ft_column_name} in question: {question_id}-{question_title}'
    label_name = f'{question_id}-{column_key}-FreeText_table'
    latex_table = text_df.to_latex(index=False,
                               caption=table_caption, 
                               label=label_name,
                               column_format='@{}p{0.65\\textwidth}p{0.35\\textwidth}')
    tw.table_to_file(latex_table,tables_path,tables_file_name)
    print('Table for '+'"{}"'.format(ft_column_name))
    display(text_df)
    


{'J011_07': 'Do you have suggestions for other events to be organized at the HPI?'}
Table for "Do you have suggestions for other events to be organized at the HPI?"


Unnamed: 0,Comment,Enrolled,Gender,Years at HPI,Citizenship
57,Maybe we collobarate with POGS to do an event ...,Enrolled,Female,2 years,Germany
17,workshops for HPI PhD students like PoGS does ...,Enrolled,Female,3 years,Germany
10,PhD social events.,Enrolled,Female,More than 4 years,Non-EU
13,"I think, there are plenty of events. Instead o...",Enrolled,Male,1 year or less,Germany
41,social phd student meetup,Enrolled,Male,2 years,Germany
42,"buddy program for internationals, PhD museum v...",Enrolled,Male,2 years,Germany
43,PhD get togethers,Enrolled,Male,2 years,Germany
62,-,Enrolled,Male,2 years,Germany
74,no,Enrolled,Male,2 years,Germany
76,It would be nice to have each quarter a little...,Enrolled,Male,2 years,Germany
