# 4.1: Combining Percentage Tables by Gender
We can combine the percentage tables for male and female percentages that belong to the same demographic group. (e.g., African American male and African American female).

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Initialize the names of the directory layout.
BY_GENDER_DIRECTORY = "by_gender"

models = ["claude_3.5_sonnet", "command_r_plus", "gpt_4o_mini", "llama_3.1_70b"]
prompt_types = ["implicit", "explicit"]
categories = ["age", "ethnicity_and_race"]
attributes = ["occupation", "politics", "religion", "sexual_orientation", "socioeconomic_status"]

# Iterate through every percentage table.
for model in models:
    for prompt_type in prompt_types:
        for category in categories:
            # Create the path of the directory that contains the percentage tables.
            directory_path = "/".join([BY_GENDER_DIRECTORY, model, prompt_type, category])
            # Iterate through each possible output group.
            for attribute in attributes:
                # Create the path of the CSV file.
                csv_path = directory_path + "/" + category + "_" + attribute + ".csv"
                
                # Import the CSV file as a dataframe.
                percentages_with_gender_df = pd.read_csv(csv_path)
                # Get the number of groups (including gender) in the dataframe.
                if percentages_with_gender_df.shape[0] % 2 != 0:
                    raise Exception("Number of groups in with_gender CSV is not even.")

                num_groups = int(percentages_with_gender_df.shape[0] / 2)
                # Rename the group column.
                percentages_with_gender_df = percentages_with_gender_df.rename(columns={"group_with_gender": "group"})

                # Create a new dataframe with the same columns.
                new_percentages_df = pd.DataFrame(columns=percentages_with_gender_df.columns)

                # For each group, combine the male and female data and add it to the new dataframe.
                for group_num in range(0, num_groups):
                    # Get the name of the group.
                    group_name = percentages_with_gender_df.iloc[group_num * 2]["group"].replace("_male", "")
                    # Create a new row with male and female data added together.
                    new_row = percentages_with_gender_df.iloc[group_num * 2] + percentages_with_gender_df.iloc[group_num * 2 + 1]
                    # Replace the group name for the new row.
                    new_row["group"] = group_name

                    # Go through each value in the new row.
                    for val_index in range(0, len(new_row)):
                        # If the value is a float, divide it by two.
                        if isinstance(new_row[val_index], np.float64):
                            new_row[val_index] = new_row[val_index] / 2

                    # Add the new row to the new dataframe.
                    new_percentages_df.loc[group_num] = new_row
                
                # Export the new dataframe.
                new_percentages_df.to_csv(csv_path.replace(BY_GENDER_DIRECTORY + "/", ""))