In [1]:
from functions import *
%matplotlib inline

Import the data of 5-minute windows preceding survey responses.

In [2]:
surveyfile = '../Data (Algebra 1)/YearSurvey.csv'
featuredf = readData(surveyfile)
featuredf = dropColumns(featuredf,['survey_id','time_window','question_id'])

Import demographic file

In [3]:
demofile = '../Data (Algebra 1)/YearDemographicsTest.csv'
demodf = readData(demofile)

Filter so survey and demographic files have the same indices

In [4]:
demodf = filterByIndex(demodf,featuredf.index)
featuredfA = filterByIndex(featuredf,demodf.index)

Set up some common parameters

In [5]:
states = ['Happiness','Frustration','Confusion','Hopefulness','Contentment',
         'Disappointment','Relief','Pride','Pleasantness','Anxiety','Engagement',
         'Interest','Sadness','Mind Wandering','Boredom','Arousal',
         'Curiosity','Surprise']
nStates = len(states)
nFolds = 10
kfold = GroupKFold(n_splits = nFolds)

## Male/Female

Divide female and male students in the demographic file and filter survey file accordingly

In [6]:
femaleind = demodf.loc[demodf['Female'] == 1].index
featuredfF = filterByIndex(featuredf,femaleind)

maleind = demodf.loc[demodf['Female'] == 0].index
featuredfM = filterByIndex(featuredf,maleind)

Train classifiers on male/female/all, and predict for male/female/all

In [None]:
scoresF = np.zeros((nStates,3))
scoresM = scoresF.copy()
scoresA = scoresF.copy()

In [None]:
# Repeat for each state
for s in range(nStates):
    # Get data for that state
    featuresM, labelsM = filterByState(states[s],featuredfM)
    featuresM = featuresM.values
    labelsM = labelsM.values
    featuresF, labelsF = filterByState(states[s],featuredfF)
    featuresF = featuresF.values
    labelsF = labelsF.values
    featuresA, labelsA = filterByState(states[s],featuredfA)
    featuresA = featuresA.values
    labelsA = labelsA.values
    
    # Repeat for k folds (Male)
    # Set up temporary scores
    temp = np.zeros((nFolds,3))
    for i, (train, test) in enumerate(kfold.split(featuresM,labelsM)):
        # Train a model
        _, model = classify(featuresM[train],labelsM[train],featuresM[test])
        # Predict Female
        predF = model.predict(featuresF)
        rhoF = evaluateSpearman(labelsF,predF)
        temp[i,0] = rhoF
        # Predict Male
        predM = model.predict(featuresM)
        rhoM = evaluateSpearman(labelsM,predM)
        temp[i,1] = rhoM
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,2] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresF[s,0] = avg[0]
    scoresM[s,0] = avg[1]
    scoresA[s,0] = avg[2]
    
    # Repeat for k folds (Female)
    # Set up temporary scores
    temp = np.zeros((nFolds,3))
    for i, (train, test) in enumerate(kfold.split(featuresF,labelsF)):
        # Train a model
        _, model = classify(featuresF[train],labelsF[train],featuresF[test])
        # Predict Female
        predF = model.predict(featuresF)
        rhoF = evaluateSpearman(labelsF,predF)
        temp[i,0] = rhoF
        # Predict Male
        predM = model.predict(featuresM)
        rhoM = evaluateSpearman(labelsM,predM)
        temp[i,1] = rhoM
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,2] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresF[s,1] = avg[0]
    scoresM[s,1] = avg[1]
    scoresA[s,1] = avg[2]
    
    # Repeat for k folds (All)
    # Set up temporary scores
    temp = np.zeros((nFolds,3))
    for i, (train, test) in enumerate(kfold.split(featuresA,labelsA)):
        # Train a model
        _, model = classify(featuresA[train],labelsA[train],featuresA[test])
        # Predict Female
        predF = model.predict(featuresF)
        rhoF = evaluateSpearman(labelsF,predF)
        temp[i,0] = rhoF
        # Predict Male
        predM = model.predict(featuresM)
        rhoM = evaluateSpearman(labelsM,predM)
        temp[i,1] = rhoM
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,2] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresF[s,2] = avg[0]
    scoresM[s,2] = avg[1]
    scoresA[s,2] = avg[2]

Display the results

In [None]:
columns = ['Train Male','Train Female','Train All']
dfF = pd.DataFrame(data=scoresF,index=states,columns=columns)
dfF['Mean'] = dfF.mean(axis=1)
avg = dfF.mean(axis=0)
avg.name = 'Mean'
dfF = dfF.append(avg)
dfM = pd.DataFrame(data=scoresM,index=states,columns=columns)
dfM['Mean'] = dfM.mean(axis=1)
avg = dfM.mean(axis=0)
avg.name = 'Mean'
dfM = dfM.append(avg)
dfA = pd.DataFrame(data=scoresA,index=states,columns=columns)
dfA['Mean'] = dfA.mean(axis=1)
avg = dfA.mean(axis=0)
avg.name = 'Mean'
dfA = dfA.append(avg)

In [None]:
dfF.to_csv('../Data (Algebra 1)/gender scores female.csv')
dfM.to_csv('../Data (Algebra 1)/gender scores male.csv')
dfA.to_csv('../Data (Algebra 1)/gender scores all.csv')

## Lunch Status

Divide students by lunch status in the demographic file and filter survey file accordingly

In [7]:
freeind = demodf.loc[demodf['Free Lunch'] == 1].index
featuredfF = filterByIndex(featuredf,freeind)

reducedind = demodf.loc[demodf['Reduced Lunch'] == 1].index
featuredfR = filterByIndex(featuredf,reducedind)

otherind = demodf.loc[demodf['Other Lunch'] == 1].index
featuredfO = filterByIndex(featuredf,otherind)

Train classifiers on free/reduced/other/all, and predict for free/reduced/other/all

In [None]:
scoresF = np.zeros((nStates,4))
scoresR = scoresF.copy()
scoresO = scoresF.copy()
scoresA = scoresF.copy()

In [None]:
# Repeat for each state
for s in range(nStates):
    # Get data for that state
    featuresF, labelsF = filterByState(states[s],featuredfF)
    featuresF = featuresF.values
    labelsF = labelsF.values
    featuresR, labelsR = filterByState(states[s],featuredfR)
    featuresR = featuresR.values
    labelsR = labelsR.values
    featuresO, labelsO = filterByState(states[s],featuredfO)
    featuresO = featuresO.values
    labelsO = labelsO.values
    featuresA, labelsA = filterByState(states[s],featuredfA)
    featuresA = featuresA.values
    labelsA = labelsA.values
    
    # Repeat for k folds (Free)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresF,labelsF)):
        # Train a model
        _, model = classify(featuresF[train],labelsF[train],featuresF[test])
        # Predict Free
        predF = model.predict(featuresF)
        rhoF = evaluateSpearman(labelsF,predF)
        temp[i,0] = rhoF
        # Predict Reduced
        predR = model.predict(featuresR)
        rhoR = evaluateSpearman(labelsR,predR)
        temp[i,1] = rhoR
        # Predict Other
        predO = model.predict(featuresO)
        rhoO = evaluateSpearman(labelsO,predO)
        temp[i,2] = rhoO
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresF[s,0] = avg[0]
    scoresR[s,0] = avg[1]
    scoresO[s,0] = avg[2]
    scoresA[s,0] = avg[3]
    
    # Repeat for k folds (Reduced)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresR,labelsR)):
        # Train a model
        _, model = classify(featuresR[train],labelsR[train],featuresR[test])
        # Predict Free
        predF = model.predict(featuresF)
        rhoF = evaluateSpearman(labelsF,predF)
        temp[i,0] = rhoF
        # Predict Reduced
        predR = model.predict(featuresR)
        rhoR = evaluateSpearman(labelsR,predR)
        temp[i,1] = rhoR
        # Predict Other
        predO = model.predict(featuresO)
        rhoO = evaluateSpearman(labelsO,predO)
        temp[i,2] = rhoO
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresF[s,1] = avg[0]
    scoresR[s,1] = avg[1]
    scoresO[s,1] = avg[2]
    scoresA[s,1] = avg[3]
    
    # Repeat for k folds (Other)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresO,labelsO)):
        # Train a model
        _, model = classify(featuresO[train],labelsO[train],featuresO[test])
        # Predict Free
        predF = model.predict(featuresF)
        rhoF = evaluateSpearman(labelsF,predF)
        temp[i,0] = rhoF
        # Predict Reduced
        predR = model.predict(featuresR)
        rhoR = evaluateSpearman(labelsR,predR)
        temp[i,1] = rhoR
        # Predict Other
        predO = model.predict(featuresO)
        rhoO = evaluateSpearman(labelsO,predO)
        temp[i,2] = rhoO
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresF[s,2] = avg[0]
    scoresR[s,2] = avg[1]
    scoresO[s,2] = avg[2]
    scoresA[s,2] = avg[3]
    
    # Repeat for k folds (All)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresA,labelsA)):
        # Train a model
        _, model = classify(featuresA[train],labelsA[train],featuresA[test])
        # Predict Free
        predF = model.predict(featuresF)
        rhoF = evaluateSpearman(labelsF,predF)
        temp[i,0] = rhoF
        # Predict Reduced
        predR = model.predict(featuresR)
        rhoR = evaluateSpearman(labelsR,predR)
        temp[i,1] = rhoR
        # Predict Other
        predO = model.predict(featuresO)
        rhoO = evaluateSpearman(labelsO,predO)
        temp[i,2] = rhoO
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresF[s,3] = avg[0]
    scoresR[s,3] = avg[1]
    scoresO[s,3] = avg[2]
    scoresA[s,3] = avg[3]

Display the results

In [None]:
columns = ['Train Free','Train Reduced','Train Other','Train All']
dfF = pd.DataFrame(data=scoresF,index=states,columns=columns)
dfF['Mean'] = dfF.mean(axis=1)
avg = dfF.mean(axis=0)
avg.name = 'Mean'
dfF = dfF.append(avg)
dfR = pd.DataFrame(data=scoresR,index=states,columns=columns)
dfR['Mean'] = dfR.mean(axis=1)
avg = dfR.mean(axis=0)
avg.name = 'Mean'
dfR = dfR.append(avg)
dfO = pd.DataFrame(data=scoresO,index=states,columns=columns)
dfO['Mean'] = dfO.mean(axis=1)
avg = dfO.mean(axis=0)
avg.name = 'Mean'
dfO = dfO.append(avg)
dfA = pd.DataFrame(data=scoresA,index=states,columns=columns)
dfA['Mean'] = dfA.mean(axis=1)
avg = dfA.mean(axis=0)
avg.name = 'Mean'
dfA = dfA.append(avg)

In [None]:
dfO.round(2)

In [None]:
dfF.to_csv('../Data (Algebra 1)/lunch scores free.csv')
dfR.to_csv('../Data (Algebra 1)/lunch scores reduced.csv')
dfO.to_csv('../Data (Algebra 1)/lunch scores other.csv')
dfA.to_csv('../Data (Algebra 1)/lunch scores all.csv')

## Grade level

Divide middle school and high school students in the demographic file and filter survey file accordingly

In [29]:
middleind = pd.concat([demodf.loc[demodf['Grade 6'] == 1],demodf.loc[demodf['Grade 7'] == 1],
                       demodf.loc[demodf['Grade 8'] == 1]]).sort_index().index
featuredfM = filterByIndex(featuredf,middleind)

highind = pd.concat([demodf.loc[demodf['Grade 9'] == 1],demodf.loc[demodf['Grade 10'] == 1],
                     demodf.loc[demodf['Grade 11'] == 1],demodf.loc[demodf['Grade 12'] == 1]]).sort_index().index
featuredfH = filterByIndex(featuredf,highind)

Train classifiers on middle/high/all, and predict for middle/high/all

In [30]:
scoresM = np.zeros((nStates,3))
scoresH = scoresM.copy()
scoresA = scoresM.copy()

In [32]:
# Repeat for each state
for s in range(nStates):
    # Get data for that state
    featuresM, labelsM = filterByState(states[s],featuredfM)
    featuresM = featuresM.values
    labelsM = labelsM.values
    featuresH, labelsH = filterByState(states[s],featuredfH)
    featuresH = featuresH.values
    labelsH = labelsH.values
    featuresA, labelsA = filterByState(states[s],featuredfA)
    featuresA = featuresA.values
    labelsA = labelsA.values
    
    # Repeat for k folds (Middle school)
    # Set up temporary scores
    temp = np.zeros((nFolds,3))
    for i, (train, test) in enumerate(kfold.split(featuresM,labelsM)):
        # Train a model
        _, model = classify(featuresM[train],labelsM[train],featuresM[test])
        # Predict Middle school
        predM = model.predict(featuresM)
        rhoM = evaluateSpearman(labelsM,predM)
        temp[i,0] = rhoM
        # Predict High school
        predH = model.predict(featuresH)
        rhoH = evaluateSpearman(labelsH,predH)
        temp[i,1] = rhoH
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,2] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresM[s,0] = avg[0]
    scoresH[s,0] = avg[1]
    scoresA[s,0] = avg[2]
    
    # Repeat for k folds (High school)
    # Set up temporary scores
    temp = np.zeros((nFolds,3))
    for i, (train, test) in enumerate(kfold.split(featuresH,labelsH)):
        # Train a model
        _, model = classify(featuresH[train],labelsH[train],featuresH[test])
        # Predict Middle school
        predM = model.predict(featuresM)
        rhoM = evaluateSpearman(labelsM,predM)
        temp[i,0] = rhoM
        # Predict High school
        predH = model.predict(featuresH)
        rhoH = evaluateSpearman(labelsH,predH)
        temp[i,1] = rhoH
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,2] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresM[s,1] = avg[0]
    scoresH[s,1] = avg[1]
    scoresA[s,1] = avg[2]
    
    # Repeat for k folds (All)
    # Set up temporary scores
    temp = np.zeros((nFolds,3))
    for i, (train, test) in enumerate(kfold.split(featuresA,labelsA)):
        # Train a model
        _, model = classify(featuresA[train],labelsA[train],featuresA[test])
        # Predict Middle school
        predM = model.predict(featuresM)
        rhoM = evaluateSpearman(labelsM,predM)
        temp[i,0] = rhoM
        # Predict High school
        predH = model.predict(featuresH)
        rhoH = evaluateSpearman(labelsH,predH)
        temp[i,1] = rhoH
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,2] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresM[s,2] = avg[0]
    scoresH[s,2] = avg[1]
    scoresA[s,2] = avg[2]

Display the results

In [33]:
columns = ['Train Middle','Train High','Train All']
dfM = pd.DataFrame(data=scoresM,index=states,columns=columns)
dfM['Mean'] = dfM.mean(axis=1)
avg = dfM.mean(axis=0)
avg.name = 'Mean'
dfM = dfM.append(avg)
dfH = pd.DataFrame(data=scoresH,index=states,columns=columns)
dfH['Mean'] = dfH.mean(axis=1)
avg = dfH.mean(axis=0)
avg.name = 'Mean'
dfH = dfH.append(avg)
dfA = pd.DataFrame(data=scoresA,index=states,columns=columns)
dfA['Mean'] = dfA.mean(axis=1)
avg = dfA.mean(axis=0)
avg.name = 'Mean'
dfA = dfA.append(avg)

In [37]:
dfH.to_csv('../Data (Algebra 1)/grade scores high school.csv')
dfM.to_csv('../Data (Algebra 1)/grade scores middle school.csv')
dfA.to_csv('../Data (Algebra 1)/grade scores all.csv')

## Race/ethnicity

Divide students by race/ethnicity in the demographic file and filter survey file accordingly. Due to small sample sizes, we will only consider white/black/hispanic

In [6]:
whiteind = demodf.loc[demodf['White'] == 1].index
featuredfW = filterByIndex(featuredf,whiteind)

blackind = demodf.loc[demodf['Black'] == 1].index
featuredfB = filterByIndex(featuredf,blackind)

hispind = demodf.loc[demodf['Hispanic'] == 1].index
featuredfH = filterByIndex(featuredf,hispind)

Train classifiers on white/black/hispanic/all, and predict for white/black/hispanic/all

In [8]:
scoresW = np.zeros((nStates,4))
scoresB = scoresW.copy()
scoresH = scoresW.copy()
scoresA = scoresW.copy()

In [9]:
# Repeat for each state
for s in range(nStates):
    # Get data for that state
    featuresW, labelsW = filterByState(states[s],featuredfW)
    featuresW = featuresW.values
    labelsW = labelsW.values
    featuresB, labelsB = filterByState(states[s],featuredfB)
    featuresB = featuresB.values
    labelsB = labelsB.values
    featuresH, labelsH = filterByState(states[s],featuredfH)
    featuresH = featuresH.values
    labelsH = labelsH.values
    featuresA, labelsA = filterByState(states[s],featuredfA)
    featuresA = featuresA.values
    labelsA = labelsA.values
    
    # Repeat for k folds (White)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresW,labelsW)):
        # Train a model
        _, model = classify(featuresW[train],labelsW[train],featuresW[test])
        # Predict White
        predW = model.predict(featuresW)
        rhoW = evaluateSpearman(labelsW,predW)
        temp[i,0] = rhoW
        # Predict Black
        predB = model.predict(featuresB)
        rhoB = evaluateSpearman(labelsB,predB)
        temp[i,1] = rhoB
        # Predict Hispanic
        predH = model.predict(featuresH)
        rhoH = evaluateSpearman(labelsH,predH)
        temp[i,2] = rhoH
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresW[s,0] = avg[0]
    scoresB[s,0] = avg[1]
    scoresH[s,0] = avg[2]
    scoresA[s,0] = avg[3]
    
    # Repeat for k folds (Black)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresB,labelsB)):
        # Train a model
        _, model = classify(featuresB[train],labelsB[train],featuresB[test])
        # Predict White
        predW = model.predict(featuresW)
        rhoW = evaluateSpearman(labelsW,predW)
        temp[i,0] = rhoW
        # Predict Black
        predB = model.predict(featuresB)
        rhoB = evaluateSpearman(labelsB,predB)
        temp[i,1] = rhoB
        # Predict Hispanic
        predH = model.predict(featuresH)
        rhoH = evaluateSpearman(labelsH,predH)
        temp[i,2] = rhoH
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresW[s,1] = avg[0]
    scoresB[s,1] = avg[1]
    scoresH[s,1] = avg[2]
    scoresA[s,1] = avg[3]
    
    # Repeat for k folds (Hispanic)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresH,labelsH)):
        # Train a model
        _, model = classify(featuresH[train],labelsH[train],featuresH[test])
        # Predict White
        predW = model.predict(featuresW)
        rhoW = evaluateSpearman(labelsW,predW)
        temp[i,0] = rhoW
        # Predict Black
        predB = model.predict(featuresB)
        rhoB = evaluateSpearman(labelsB,predB)
        temp[i,1] = rhoB
        # Predict Hispanic
        predH = model.predict(featuresH)
        rhoH = evaluateSpearman(labelsH,predH)
        temp[i,2] = rhoH
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresW[s,2] = avg[0]
    scoresB[s,2] = avg[1]
    scoresH[s,2] = avg[2]
    scoresA[s,2] = avg[3]
    
    # Repeat for k folds (All)
    # Set up temporary scores
    temp = np.zeros((nFolds,4))
    for i, (train, test) in enumerate(kfold.split(featuresA,labelsA)):
        # Train a model
        _, model = classify(featuresA[train],labelsA[train],featuresA[test])
        # Predict White
        predW = model.predict(featuresW)
        rhoW = evaluateSpearman(labelsW,predW)
        temp[i,0] = rhoW
        # Predict Black
        predB = model.predict(featuresB)
        rhoB = evaluateSpearman(labelsB,predB)
        temp[i,1] = rhoB
        # Predict Hispanic
        predH = model.predict(featuresH)
        rhoH = evaluateSpearman(labelsH,predH)
        temp[i,2] = rhoH
        # Predict All
        predA = model.predict(featuresA)
        rhoA = evaluateSpearman(labelsA,predA)
        temp[i,3] = rhoA
    # Average across folds
    avg = np.mean(temp,axis=0)
    # Store results
    scoresW[s,3] = avg[0]
    scoresB[s,3] = avg[1]
    scoresH[s,3] = avg[2]
    scoresA[s,3] = avg[3]

Display the results

In [10]:
columns = ['Train White','Train Black','Train Hispanic','Train All']
dfW = pd.DataFrame(data=scoresW,index=states,columns=columns)
dfW['Mean'] = dfW.mean(axis=1)
avg = dfW.mean(axis=0)
avg.name = 'Mean'
dfW = dfW.append(avg)
dfB = pd.DataFrame(data=scoresB,index=states,columns=columns)
dfB['Mean'] = dfB.mean(axis=1)
avg = dfB.mean(axis=0)
avg.name = 'Mean'
dfB = dfB.append(avg)
dfH = pd.DataFrame(data=scoresH,index=states,columns=columns)
dfH['Mean'] = dfH.mean(axis=1)
avg = dfH.mean(axis=0)
avg.name = 'Mean'
dfH = dfH.append(avg)
dfA = pd.DataFrame(data=scoresA,index=states,columns=columns)
dfA['Mean'] = dfA.mean(axis=1)
avg = dfA.mean(axis=0)
avg.name = 'Mean'
dfA = dfA.append(avg)

In [14]:
dfA.round(2)

Unnamed: 0,Train White,Train Black,Train Hispanic,Train All,Mean
Happiness,0.29,0.28,0.29,0.3,0.29
Frustration,0.29,0.27,0.27,0.29,0.28
Confusion,0.31,0.3,0.3,0.31,0.3
Hopefulness,0.28,0.28,0.27,0.28,0.28
Contentment,0.3,0.3,0.3,0.3,0.3
Disappointment,0.29,0.27,0.28,0.29,0.28
Relief,0.27,0.26,0.26,0.27,0.27
Pride,0.27,0.27,0.27,0.27,0.27
Pleasantness,0.25,0.24,0.24,0.25,0.25
Anxiety,0.23,0.21,0.22,0.23,0.22


In [15]:
dfW.to_csv('../Data (Algebra 1)/race scores white.csv')
dfB.to_csv('../Data (Algebra 1)/race scores black.csv')
dfH.to_csv('../Data (Algebra 1)/race scores hispanic.csv')
dfA.to_csv('../Data (Algebra 1)/race scores all.csv')

## Positive/Negative affect

Filter surveys according to positive or negative affective states

In [6]:
featuredfP = pd.concat([featuredfA.loc[featuredfA['survey_question']=='Happiness'],featuredfA.loc[featuredfA['survey_question']=='Hopefulness'],
                       featuredfA.loc[featuredfA['survey_question']=='Contentment'],featuredfA.loc[featuredfA['survey_question']=='Relief'],
                       featuredfA.loc[featuredfA['survey_question']=='Pride'],featuredfA.loc[featuredfA['survey_question']=='Pleasantness'],
                       featuredfA.loc[featuredfA['survey_question']=='Interest'],featuredfA.loc[featuredfA['survey_question']=='Arousal'],
                       featuredfA.loc[featuredfA['survey_question']=='Engagement']]).sort_index()
featuredfN = pd.concat([featuredfA.loc[featuredfA['survey_question']=='Frustration'],featuredfA.loc[featuredfA['survey_question']=='Confusion'],
                       featuredfA.loc[featuredfA['survey_question']=='Disappointment'],featuredfA.loc[featuredfA['survey_question']=='Anxiety'],
                       featuredfA.loc[featuredfA['survey_question']=='Sadness'],featuredfA.loc[featuredfA['survey_question']=='Mind Wandering'],
                       featuredfA.loc[featuredfA['survey_question']=='Boredom']]).sort_index()

In [7]:
labelsP = featuredfP['survey_answer'].values
featuresP = dropColumns(featuredfP,['survey_answer','survey_question']).values

labelsN = featuredfN['survey_answer'].values
featuresN = dropColumns(featuredfN,['survey_answer','survey_question']).values

In [8]:
statesP = ['Happiness','Hopefulness','Contentment','Relief',
          'Pride','Pleasantness','Interest','Arousal','Engagement']
statesN = ['Frustration','Confusion','Disappointment','Anxiety',
          'Sadness','Mind Wandering','Boredom']

In [10]:
scoresPP = np.zeros((len(statesP),nFolds))
scoresNP = scoresPP.copy()
scoresNN = np.zeros((len(statesN),nFolds))
scoresPN = scoresNN.copy()

# Repeat for k folds (Positive)
for i, (train, test) in enumerate(kfold.split(featuresP,labelsP)):
    
    # Train positive model
    _,model = classify(featuresP[train],labelsP[train],featuresP[test])
    
    # Predict for each positive state
    for s in range(len(statesP)):
        featuresS, labelsS = filterByState(statesP[s],featuredfP)
        featuresS = featuresS.values
        labelsS = labelsS.values
        predS = model.predict(featuresS)
        rhoS = evaluateSpearman(labelsS,predS)
        scoresPP[s,i] = rhoS
        
    # Predict for each negative state
    for s in range(len(statesN)):
        featuresS, labelsS = filterByState(statesN[s],featuredfN)
        featuresS = featuresS.values
        labelsS = labelsS.values
        predS = model.predict(featuresS)
        rhoS = evaluateSpearman(labelsS,predS)
        scoresPN[s,i] = rhoS
        
# Repeat for k folds (Negative)
for i, (train, test) in enumerate(kfold.split(featuresN,labelsN)):
    
    # Train negative model
    _,model = classify(featuresN[train],labelsN[train],featuresN[test])
    
    # Predict for each negative state
    for s in range(len(statesN)):
        featuresS, labelsS = filterByState(statesN[s],featuredfN)
        featuresS = featuresS.values
        labelsS = labelsS.values
        predS = model.predict(featuresS)
        rhoS = evaluateSpearman(labelsS,predS)
        scoresNN[s,i] = rhoS
        
    # Predict for each positive state
    for s in range(len(statesP)):
        featuresS, labelsS = filterByState(statesP[s],featuredfP)
        featuresS = featuresS.values
        labelsS = labelsS.values
        predS = model.predict(featuresS)
        rhoS = evaluateSpearman(labelsS,predS)
        scoresNP[s,i] = rhoS

Display the results

In [11]:
dfP = pd.DataFrame(data = np.mean(scoresPP,axis=1),index=statesP,columns=['Train Positive'])
dfP['Train States'] = [0.28,0.27,0.29,0.26,0.27,0.24,0.20,0.13,0.21]
dfP['Train Negative'] = np.mean(scoresNP,axis=1)
dfN = pd.DataFrame(data = np.mean(scoresNN,axis=1),index=statesN,columns=['Train Negative'])
dfN['Train States'] = [0.28,0.28,0.28,0.20,0.17,0.16,0.14]
dfN['Train Positive'] = np.mean(scoresPN,axis=1)

In [12]:
avg = dfP.mean(axis=0)
avg.name = 'Mean'
dfP = dfP.append(avg)
avg = dfN.mean(axis=0)
avg.name = 'Mean'
dfN = dfN.append(avg)

In [14]:
dfN.round(2)

Unnamed: 0,Train Negative,Train States,Train Positive
Frustration,0.28,0.28,-0.28
Confusion,0.31,0.28,-0.3
Disappointment,0.28,0.28,-0.27
Anxiety,0.21,0.2,-0.2
Sadness,0.18,0.17,-0.17
Mind Wandering,0.14,0.16,-0.14
Boredom,0.14,0.14,-0.13
Mean,0.22,0.22,-0.21


In [15]:
dfP.to_csv('../Data (Algebra 1)/scores positive.csv')
dfN.to_csv('../Data (Algebra 1)/scores negative.csv')