In [10]:
# Import packages
import pandas as pd 
from itertools import product

In [19]:
# Read in each column header enumeration
workbook_name = 'column-header-enumeration.xlsx'
report_types = pd.read_excel(workbook_name,
                     sheet_name='Report-Type',
                     header=None,
                     engine='openpyxl',
                     names=['report_type'])
race_ethnicity = pd.read_excel(workbook_name,
                     sheet_name='Race-Ethnicity',
                     engine='openpyxl')
genders = pd.read_excel(workbook_name,
                     sheet_name='Gender',
                     header=None,
                     engine='openpyxl',
                     names=['gender'])
val_formats = pd.read_excel(workbook_name,
                     sheet_name='Value-Format',
                     header=None,
                     engine='openpyxl',
                     names=['val_format'])


In [20]:
# Create DataFrame of all combinations of column header enumerations
race_tuples = tuple(race_ethnicity.to_records(index=False))
result = pd.DataFrame(list(product(report_types.report_type, race_tuples, genders.gender, val_formats.val_format)),
                      columns=['report_type', 'race_ethnicity', 'gender', 'val_format'])
result['race_ethnicity_1'] = [ele[0] for ele in result.race_ethnicity]
result['race_ethnicity_2'] = [ele[1] for ele in result.race_ethnicity]
result.drop(columns=['race_ethnicity'], inplace=True)

In [21]:
# Add a column capturing a unique attribute code

re_group = result.groupby(['race_ethnicity_1', 'race_ethnicity_2'], dropna=False).ngroup()
# Define a function to produce unique attribute codes
def concat_unique_attr_code(report_type, gender, val_format, idx):
    gender_map = {'Total, both sexes': 'B', 'Male': 'M', 'Female': 'F'}
    val_format_map = {'Number': 'N', 'Percent': 'P'}
    attr_tuple = (report_type, gender_map[gender], val_format_map[val_format], str(re_group[idx]))
    unique_attr_code = '_'.join(attr_tuple)
    return unique_attr_code

result['unique_attr_code'] = result.apply(lambda row: concat_unique_attr_code(row['report_type'], row['gender'], row['val_format'], row.name), axis=1)

In [24]:
# Write resulting table to XLSX file: 2014_2018_EEOALL1R_Column_Headers.xlsx
result.to_csv('2014_2018_EEOALL1R_Column_Headers.csv', index=False)

In [22]:
result

Unnamed: 0,report_type,gender,val_format,race_ethnicity_1,race_ethnicity_2,unique_attr_code
0,EST,"Total, both sexes",Number,"Total, race and ethnicity",,EST_B_N_7
1,EST,"Total, both sexes",Percent,"Total, race and ethnicity",,EST_B_P_7
2,EST,Male,Number,"Total, race and ethnicity",,EST_M_N_7
3,EST,Male,Percent,"Total, race and ethnicity",,EST_M_P_7
4,EST,Female,Number,"Total, race and ethnicity",,EST_F_N_7
...,...,...,...,...,...,...
91,MOE,"Total, both sexes",Percent,Balance of not Hispanic or Latino,,MOE_B_P_0
92,MOE,Male,Number,Balance of not Hispanic or Latino,,MOE_M_N_0
93,MOE,Male,Percent,Balance of not Hispanic or Latino,,MOE_M_P_0
94,MOE,Female,Number,Balance of not Hispanic or Latino,,MOE_F_N_0
