Get the version of python installed

In [173]:
!python -V

Python 3.6.5 :: Anaconda, Inc.


In [174]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import re
from scipy import stats

In [175]:
data = pd.read_csv("grades.csv")
print("physics course grade data read into dataframe successfully")
data.drop(['Unnamed: 8','Unnamed: 9','Unnamed: 10','Unnamed: 11','Unnamed: 12'], axis=1, inplace=True)
data.head()

physics course grade data read into dataframe successfully


Unnamed: 0,Identifier,Gndr_Flag,Ethnicity,First Gen,PHY 183 term,First day section,Quarter term section,final grade
0,0080B044-2082-44C3-A6A1-A930DE7B456E,F,White (non-Hispanic),Y,FS14,2.0,2.0,1.5
1,008CEDE8-726D-46F8-BEE0-B99200BA8DBD,F,Black or African American (non-Hispanic),N,FS14,1.0,1.0,1.5
2,013133EC-45FB-4627-A436-79E547F5795E,M,International,Y,FS14,1.0,1.0,4.0
3,0131C37C-0336-4A9C-8988-CC605F5EE5A9,M,White (non-Hispanic),N,FS14,2.0,2.0,3.5
4,01A6F74B-7A75-4E4A-B012-9909E17EA743,F,White (non-Hispanic),N,FS14,4.0,4.0,3.5


In [176]:
data.shape

(6345, 8)

In [177]:
data["Ethnicity"].value_counts()

White (non-Hispanic)                             3941
International                                    1193
Asian (non-Hispanic)                              443
Black or African American  (non-Hispanic)         304
Hispanic Ethnicity                                230
Two or more races (non-Hispanic)                  163
Not Reported                                       56
American Indian/Alaskan Native (non-Hispanic)       9
Hawaiian / Pacific Islander (non-Hispanic)          6
Name: Ethnicity, dtype: int64

Remove all rows that are of students that have Ethnicity values of "International" or "Not Reported" our stated goal is to attend to the push for diverse STEM workforce in America, so it's appropriate to focus on domestic students.

In [178]:
#data = data[data.Ethnicity != 'International']
#data = data[data.Ethnicity != 'Not Reported']

domestic = data.loc[
    (data.Ethnicity != 'International') &
    (data.Ethnicity != 'Not Reported')
]

In [179]:
domestic["Ethnicity"].value_counts()

White (non-Hispanic)                             3941
Asian (non-Hispanic)                              443
Black or African American  (non-Hispanic)         304
Hispanic Ethnicity                                230
Two or more races (non-Hispanic)                  163
American Indian/Alaskan Native (non-Hispanic)       9
Hawaiian / Pacific Islander (non-Hispanic)          6
Name: Ethnicity, dtype: int64

In [180]:
domestic.shape

(5096, 8)

In [181]:
receivedGrade = domestic.loc[
    (domestic["final grade"] != '(dropped)') &
    (domestic["final grade"] != 'W') &
    (domestic["final grade"] != 'V')   
].copy()
leftCourse = domestic.loc[
    (domestic["final grade"] == '(dropped)') |
    (domestic["final grade"] == 'W') 
].copy()

In [182]:
receivedGrade.dtypes
receivedGrade['final grade'] = receivedGrade['final grade'].astype(float)


**This is the total average grade of everyone who received a grade that is Domestic + Stated Ethnicity**

In [183]:
receivedGrade['final grade'].describe()

count    4778.000000
mean        2.990477
std         0.972224
min         0.000000
25%         2.500000
50%         3.000000
75%         3.500000
max         4.000000
Name: final grade, dtype: float64

In [184]:
receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"].describe()

count    3710.000000
mean        3.075067
std         0.918834
min         0.000000
25%         2.500000
50%         3.500000
75%         4.000000
max         4.000000
Name: final grade, dtype: float64

In [185]:
receivedGrade.loc[receivedGrade["Ethnicity"] == 'Black or African American  (non-Hispanic)']["final grade"].describe()

count    269.000000
mean       2.256506
std        1.086383
min        0.000000
25%        1.500000
50%        2.500000
75%        3.000000
max        4.000000
Name: final grade, dtype: float64

In [186]:
receivedGrade.loc[receivedGrade["Ethnicity"] == 'Hispanic Ethnicity']["final grade"].describe()

count    210.000000
mean       2.559524
std        1.123916
min        0.000000
25%        2.000000
50%        3.000000
75%        3.500000
max        4.000000
Name: final grade, dtype: float64

****Mann-Whitney-Wilcoxon (MWW) RankSum test for final grades of White and Black****

In [187]:
z_stat, p_val = stats.ranksums(
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"],
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'Black or African American  (non-Hispanic)']["final grade"]
)
receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"].mean()
receivedGrade.loc[receivedGrade["Ethnicity"] == 'Black or African American  (non-Hispanic)']["final grade"].mean()

print("MWW RankSum P for grades of White and Black students =", p_val)


z_stat, p_val = stats.ranksums(
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"],
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'Asian (non-Hispanic)']["final grade"]
)
print("MWW RankSum P for grades of White and Asian students =", p_val)


z_stat, p_val = stats.ranksums(
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"],
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'Hispanic Ethnicity']["final grade"]
)
print("MWW RankSum P for grades of White and Hispanic students =", p_val)


z_stat, p_val = stats.ranksums(
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"],
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'Two or more races (non-Hispanic)']["final grade"]
)
print("MWW RankSum P for grades of White and Multiracial students =", p_val)


z_stat, p_val = stats.ranksums(
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"],
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'American Indian/Alaskan Native (non-Hispanic)']["final grade"]
)
print("MWW RankSum P for grades of White and AmericanIndian/AlaskanNative students =", p_val)


z_stat, p_val = stats.ranksums(
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'White (non-Hispanic)']["final grade"],
    receivedGrade.loc[receivedGrade["Ethnicity"] == 'Hawaiian / Pacific Islander (non-Hispanic)']["final grade"]
)
print("MWW RankSum P for grades of White and Hawaiian/PacIslander students =", p_val)





MWW RankSum P for grades of White and Black students = 1.1643249725996193e-36
MWW RankSum P for grades of White and Asian students = 0.01800661191741279
MWW RankSum P for grades of White and Hispanic students = 2.4170491908904494e-12
MWW RankSum P for grades of White and Multiracial students = 0.16784028399399697
MWW RankSum P for grades of White and AmericanIndian/AlaskanNative students = 0.29913089202382515
MWW RankSum P for grades of White and Hawaiian/PacIslander students = 0.9193115841334023
