In [1]:
# Dependencies
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import math

In [2]:
# Generate "Simulated" Data
# Racial and Ethnic Categories: https://grants.nih.gov/grants/guide/notice-files/NOT-OD-15-089.html
categories = [
    "American Indian or Alaska Native",  
    "Asian", 
    "Black or African American", 
    "Hispanic or Latino", 
    "Native Hawaiian or Other Pacific Islander",
    "White",
    "Other"]
sorority_races = np.random.choice(categories, 200, p=[0.01, 0.05, 0.10, 0.07, 0.02, 0.5, 0.25])
university_races = np.random.choice(categories, 1000, p=[0.01, 0.06, 0.12, 0.24, 0.02, 0.5, 0.05])

In [3]:
sorority_pd = pd.DataFrame(sorority_races)
sorority_counts = sorority_pd[0].value_counts()
sorority_counts

White                                        108
Other                                         46
Black or African American                     22
Asian                                         11
Hispanic or Latino                             8
Native Hawaiian or Other Pacific Islander      4
American Indian or Alaska Native               1
Name: 0, dtype: int64

In [4]:
university_pd = pd.DataFrame(university_races)
university_counts = university_pd[0].value_counts()
university_counts

White                                        481
Hispanic or Latino                           257
Black or African American                    115
Asian                                         75
Other                                         47
Native Hawaiian or Other Pacific Islander     15
American Indian or Alaska Native              10
Name: 0, dtype: int64

In [5]:
university_ratios = university_pd[0].value_counts() / len(university_pd)
university_ratios

White                                        0.481
Hispanic or Latino                           0.257
Black or African American                    0.115
Asian                                        0.075
Other                                        0.047
Native Hawaiian or Other Pacific Islander    0.015
American Indian or Alaska Native             0.010
Name: 0, dtype: float64

In [6]:
sorority_expected = university_ratios * len(sorority_pd)
sorority_expected

White                                        96.2
Hispanic or Latino                           51.4
Black or African American                    23.0
Asian                                        15.0
Other                                         9.4
Native Hawaiian or Other Pacific Islander     3.0
American Indian or Alaska Native              2.0
Name: 0, dtype: float64

In [7]:
# Run Chi-Square Test to determine goodness of fit
stats.chisquare(f_obs=sorority_counts, f_exp=sorority_expected)

Power_divergenceResult(statistic=4.16670532166596, pvalue=0.65412809444327347)