### Import packages and data!

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

# re-import our WIC dataset
wic_data = pd.read_csv("datasets/WIC_data.csv")
display(wic_data.head())

### Task 1: Create a one-way frequency table

In [None]:
# create a dataframe with two columns
freq_table_one = pd.DataFrame(columns=["Region", "Total number of program enrollees"])
i = 0 #indexer for our frequency table

# like before, group the data by region
grouped_wic_data = wic_data.groupby(by="REGION")

# loop over all regions and compute the enrollee totals
for region, region_data in grouped_wic_data:

    # get all totals for this region across all race groups from the "Total of all Racial Categories Total" column
    totals = region_data["Total of all Racial Categories Total"] 
    
    # add up all of these total for this region
    region_total = sum(totals)
    
    # add the region and this total to our frequency table
    freq_table_one.loc[i, :] = [region, region_total]
    i += 1 #increment indexer so it we are ready to add a new item to our dataframe

# set the index of our frequency table to "Region"
freq_table_one = freq_table_one.set_index("Region")
display(freq_table_one)    

### Task 2: Create a two-way frequency table

In [None]:
race_groups = ["Asian", "Black"] #our race groups of interest

# create a dataframe with one column for "region" and a column for each race group of interest
freq_table_two = pd.DataFrame(columns=["Region", "Asian Total", "Black Total"])
i = 0

grouped_wic_data = wic_data.groupby(by="REGION")

# loop through all regions
for region, region_data in grouped_wic_data:
    
    region_totals = []
    
    # compute the total number of enrollees for each race for this region
    for race in race_groups:
        race_total = sum(region_data[race + " Total"])
        region_totals.append(race_total)
    
    # add the race group totals for this region to the frequency table
    freq_table_two.loc[i, :] = [region] + region_totals
    i += 1

freq_table_two = freq_table_two.set_index("Region")
display(freq_table_two)

### Task 3

#### Save the region category names in a list

In [None]:
region_names = list(freq_table_two.index)
print(region_names)

#### Calculate category percentages

In [None]:
total_asian = np.sum(list(freq_table_two['Asian Total']))
asian_percents = [(freq_table_two.loc[r, 'Asian Total']/total_asian)*100 for r in region_names]

total_black = np.sum(list(freq_table_two['Black Total']))
black_percents = [(freq_table_two.loc[r, 'Black Total']/total_black)*100 for r in region_names]

#### Create comparitive pie chart

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(30,15))

piechart1 = ax1.pie(asian_percents, labels=region_names, autopct="%1.1f%%")
ax1.axis("equal") # to ensure we get a circle shape instead of an oval
ax1.set_title("Percentage of Total Asian Enrollees Per Region", fontsize=30)
# change label font sizes
for i in range(len(piechart1[1])):
    piechart1[1][i].set_fontsize(18)
    piechart1[2][i].set_fontsize(18)
    
piechart2 = ax2.pie(black_percents, labels=region_names, autopct="%1.1f%%")
ax2.axis("equal") # to ensure we get a circle shape instead of an oval
ax2.set_title("Percentage of Total Black Enrollees Per Region", fontsize=30)
# change label font sizes
for i in range(len(piechart2[1])):
    piechart2[1][i].set_fontsize(18)
    piechart2[2][i].set_fontsize(18)
    
plt.show()