In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pingouin as pg


This just imports the pre and post surveys as pandas dataframes.

In [2]:
path = "/Users/erikmenke/UC-Merced/Data/2018-2019/NSF HSI/2018-2019StudentData/2019Spring/Survey/"
PreSurveyDataFilename = "Presurvey.xlsx"
PostSurveyDataFilename = "Postsurvey.xlsx"

PreSurveyData = pd.read_excel(path+PreSurveyDataFilename)
PostSurveyData = pd.read_excel(path+PostSurveyDataFilename)


First, I'm going to create boolean masks based on the factor analysis to create different groups for the dataframes. The first mask will pick out questions 1, 4, 5, and 10 which all share a common factor. The second mask will pick out questions 7, 11, 14, and 17.

In [3]:
#First mask picks out questions 1, 4, 5, and 10
factor2Mask1 = [True] + 2*[False] + 2*[True] + 4*[False] + [True] + 10*[False]

#Second mask picks out questions 7, 11, 14, and 17
factor2Mask2 = 6*[False] + [True] + 3*[False] + [True] + 2*[False] + [True] + 2*[False] + [True] + 3*[False]



Get the loadings for the attitudes on chemistry for the presemester survey

In [4]:
ChemAttPre = PreSurveyData.iloc[:,1:21][1:]
ChemAttPre.dropna(inplace=True)
ChemAttPre = ChemAttPre.astype(str).astype(int)

#This is the first set of factors, reversing question 1
ChemPreFactor1Frame = ChemAttPre.iloc[:,factor2Mask1]
ChemPreFactor1Frame.loc[:,'Q1_1'] = ChemPreFactor1Frame.loc[:,'Q1_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
ChemPreFactor2Frame = ChemAttPre.iloc[:,factor2Mask2]
ChemPreFactor2Frame.loc[:,'Q1_7'] = ChemPreFactor2Frame.loc[:,'Q1_7'].add(-8).mul(-1)
ChemPreFactor2Frame.loc[:,'Q1_11'] = ChemPreFactor2Frame.loc[:,'Q1_11'].add(-8).mul(-1)
ChemPreFactor2Frame.loc[:,'Q1_14'] = ChemPreFactor2Frame.loc[:,'Q1_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(ChemPreFactor1Frame))
print(pg.cronbach_alpha(ChemPreFactor2Frame))

print(ChemPreFactor1Frame.corr())
print(ChemPreFactor2Frame.corr())


(0.7820526223705248, array([0.754, 0.807]))
(0.7508778852672695, array([0.719, 0.78 ]))
           Q1_1      Q1_4      Q1_5     Q1_10
Q1_1   1.000000  0.451107  0.559212  0.352612
Q1_4   0.451107  1.000000  0.601789  0.504428
Q1_5   0.559212  0.601789  1.000000  0.377262
Q1_10  0.352612  0.504428  0.377262  1.000000
           Q1_7     Q1_11     Q1_14     Q1_17
Q1_7   1.000000  0.614945  0.521277  0.279125
Q1_11  0.614945  1.000000  0.570127  0.343056
Q1_14  0.521277  0.570127  1.000000  0.320937
Q1_17  0.279125  0.343056  0.320937  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Here's the code for the post semester survey, for the chemistry attitudes portion

In [5]:
ChemAttPost = PostSurveyData.iloc[:,1:21][1:]
ChemAttPost.dropna(inplace=True)
ChemAttPost = ChemAttPost.astype(str).astype(int)

#This is the first set of factors, reversing question 1
ChemPostFactor1Frame = ChemAttPost.iloc[:,factor2Mask1]
ChemPostFactor1Frame.loc[:,'Q1_1'] = ChemPostFactor1Frame.loc[:,'Q1_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
ChemPostFactor2Frame = ChemAttPost.iloc[:,factor2Mask2]
ChemPostFactor2Frame.loc[:,'Q1_7'] = ChemPostFactor2Frame.loc[:,'Q1_7'].add(-8).mul(-1)
ChemPostFactor2Frame.loc[:,'Q1_11'] = ChemPostFactor2Frame.loc[:,'Q1_11'].add(-8).mul(-1)
ChemPostFactor2Frame.loc[:,'Q1_14'] = ChemPostFactor2Frame.loc[:,'Q1_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(ChemPostFactor1Frame))
print(pg.cronbach_alpha(ChemPostFactor2Frame))

print(ChemPostFactor1Frame.corr())
print(ChemPostFactor2Frame.corr())


(0.8211344454403814, array([0.776, 0.859]))
(0.8149764549912659, array([0.768, 0.854]))
           Q1_1      Q1_4      Q1_5     Q1_10
Q1_1   1.000000  0.509355  0.624809  0.572064
Q1_4   0.509355  1.000000  0.677974  0.421076
Q1_5   0.624809  0.677974  1.000000  0.408555
Q1_10  0.572064  0.421076  0.408555  1.000000
           Q1_7     Q1_11     Q1_14     Q1_17
Q1_7   1.000000  0.707692  0.536726  0.410294
Q1_11  0.707692  1.000000  0.690710  0.419799
Q1_14  0.536726  0.690710  1.000000  0.457785
Q1_17  0.410294  0.419799  0.457785  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [6]:
#Calculate the pre and post means for factor 1, as well as effect size
ChemPreFactor1Mean = (ChemPreFactor1Frame).mean().mean()
ChemPreFactor1DOF = (ChemPreFactor1Frame.count()-1).sum()
ChemPreFactor1Variance = ((ChemPreFactor1Frame.std() ** 2) * (ChemPreFactor1Frame.count() - 1)).sum() / ChemPreFactor1DOF

ChemPostFactor1Mean = (ChemPostFactor1Frame).mean().mean()
ChemPostFactor1DOF = (ChemPostFactor1Frame.count()-1).sum()
ChemPostFactor1Variance = ((ChemPostFactor1Frame.std() ** 2) * (ChemPostFactor1Frame.count() - 1)).sum() / ChemPostFactor1DOF

ChemFactor1PooledSD = np.sqrt((ChemPostFactor1DOF * ChemPreFactor1Variance + ChemPostFactor1DOF * ChemPostFactor1Variance) / (ChemPostFactor1DOF + ChemPostFactor1DOF))

ChemFactor1EffectSize = (ChemPostFactor1Mean - ChemPreFactor1Mean)/ChemFactor1PooledSD

#Calculate the pre and post means for factor 2, as well as effect size
ChemPreFactor2Mean = (ChemPreFactor2Frame).mean().mean()
ChemPreFactor2DOF = (ChemPreFactor2Frame.count()-1).sum()
ChemPreFactor2Variance = ((ChemPreFactor2Frame.std() ** 2) * (ChemPreFactor2Frame.count() - 1)).sum() / ChemPreFactor2DOF

ChemPostFactor2Mean = (ChemPostFactor2Frame).mean().mean()
ChemPostFactor2DOF = (ChemPostFactor2Frame.count()-1).sum()
ChemPostFactor2Variance = ((ChemPostFactor2Frame.std() ** 2) * (ChemPostFactor2Frame.count() - 1)).sum() / ChemPostFactor2DOF

ChemFactor2PooledSD = np.sqrt((ChemPostFactor2DOF * ChemPreFactor2Variance + ChemPostFactor2DOF * ChemPostFactor2Variance) / (ChemPostFactor2DOF + ChemPostFactor2DOF))

ChemFactor2EffectSize = (ChemPostFactor2Mean - ChemPreFactor2Mean)/ChemFactor2PooledSD




Now, trying this with the bio attitudes section of the presurvey

In [7]:
BioAttPre = PreSurveyData.iloc[:,21:41][1:]
BioAttPre.dropna(inplace=True)
BioAttPre = BioAttPre.astype(str).astype(int)

#This is the first set of factors, reversing question 1
BioPreFactor1Frame = BioAttPre.iloc[:,factor2Mask1]
BioPreFactor1Frame.loc[:,'Q34_1'] = BioPreFactor1Frame.loc[:,'Q34_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
BioPreFactor2Frame = BioAttPre.iloc[:,factor2Mask2]
BioPreFactor2Frame.loc[:,'Q34_7'] = BioPreFactor2Frame.loc[:,'Q34_7'].add(-8).mul(-1)
BioPreFactor2Frame.loc[:,'Q34_11'] = BioPreFactor2Frame.loc[:,'Q34_11'].add(-8).mul(-1)
BioPreFactor2Frame.loc[:,'Q34_14'] = BioPreFactor2Frame.loc[:,'Q34_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(BioPreFactor1Frame))
print(pg.cronbach_alpha(BioPreFactor2Frame))

print(BioPreFactor1Frame.corr())
print(BioPreFactor2Frame.corr())


(0.777376504120464, array([0.749, 0.803]))
(0.74920519203003, array([0.717, 0.778]))
           Q34_1     Q34_4     Q34_5    Q34_10
Q34_1   1.000000  0.479603  0.530608  0.367714
Q34_4   0.479603  1.000000  0.580463  0.492889
Q34_5   0.530608  0.580463  1.000000  0.368523
Q34_10  0.367714  0.492889  0.368523  1.000000
           Q34_7    Q34_11    Q34_14    Q34_17
Q34_7   1.000000  0.616967  0.531550  0.281947
Q34_11  0.616967  1.000000  0.538796  0.340197
Q34_14  0.531550  0.538796  1.000000  0.325353
Q34_17  0.281947  0.340197  0.325353  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Now for the postsemester survey, for the Bio attitudes

In [8]:
BioAttPost = PostSurveyData.iloc[:,21:41][1:]
BioAttPost.dropna(inplace=True)
BioAttPost = BioAttPost.astype(str).astype(int)

#This is the first set of factors, reversing question 1
BioPostFactor1Frame = BioAttPost.iloc[:,factor2Mask1]
BioPostFactor1Frame.loc[:,'Q34_1'] = BioPostFactor1Frame.loc[:,'Q34_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 4
BioPostFactor2Frame = BioAttPost.iloc[:,factor2Mask2]
BioPostFactor2Frame.loc[:,'Q34_7'] = BioPostFactor2Frame.loc[:,'Q34_7'].add(-8).mul(-1)
BioPostFactor2Frame.loc[:,'Q34_11'] = BioPostFactor2Frame.loc[:,'Q34_11'].add(-8).mul(-1)
BioPostFactor2Frame.loc[:,'Q34_14'] = BioPostFactor2Frame.loc[:,'Q34_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(BioPostFactor1Frame))
print(pg.cronbach_alpha(BioPostFactor2Frame))

print(BioPostFactor1Frame.corr())
print(BioPostFactor2Frame.corr())


(0.8197836967248527, array([0.772, 0.859]))
(0.7946388382175261, array([0.741, 0.84 ]))
           Q34_1     Q34_4     Q34_5    Q34_10
Q34_1   1.000000  0.478917  0.540404  0.461637
Q34_4   0.478917  1.000000  0.611555  0.645062
Q34_5   0.540404  0.611555  1.000000  0.507167
Q34_10  0.461637  0.645062  0.507167  1.000000
           Q34_7    Q34_11    Q34_14    Q34_17
Q34_7   1.000000  0.749058  0.668840  0.230572
Q34_11  0.749058  1.000000  0.739075  0.307498
Q34_14  0.668840  0.739075  1.000000  0.328510
Q34_17  0.230572  0.307498  0.328510  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [9]:
#Calculate the pre and post means for factor 1, as well as effect size
BioPreFactor1Mean = (BioPreFactor1Frame).mean().mean()
BioPreFactor1DOF = (BioPreFactor1Frame.count()-1).sum()
BioPreFactor1Variance = ((BioPreFactor1Frame.std() ** 2) * (BioPreFactor1Frame.count() - 1)).sum() / BioPreFactor1DOF

BioPostFactor1Mean = (BioPostFactor1Frame).mean().mean()
BioPostFactor1DOF = (BioPostFactor1Frame.count()-1).sum()
BioPostFactor1Variance = ((BioPostFactor1Frame.std() ** 2) * (BioPostFactor1Frame.count() - 1)).sum() / BioPostFactor1DOF

BioFactor1PooledSD = np.sqrt((BioPostFactor1DOF * BioPreFactor1Variance + BioPostFactor1DOF * BioPostFactor1Variance) / (BioPostFactor1DOF + BioPostFactor1DOF))

BioFactor1EffectSize = (BioPostFactor1Mean - BioPreFactor1Mean)/BioFactor1PooledSD

#Calculate the pre and post means for factor 2, as well as effect size
BioPreFactor2Mean = (BioPreFactor2Frame).mean().mean()
BioPreFactor2DOF = (BioPreFactor2Frame.count()-1).sum()
BioPreFactor2Variance = ((BioPreFactor2Frame.std() ** 2) * (BioPreFactor2Frame.count() - 1)).sum() / BioPreFactor2DOF

BioPostFactor2Mean = (BioPostFactor2Frame).mean().mean()
BioPostFactor2DOF = (BioPostFactor2Frame.count()-1).sum()
BioPostFactor2Variance = ((BioPostFactor2Frame.std() ** 2) * (BioPostFactor2Frame.count() - 1)).sum() / BioPostFactor2DOF

BioFactor2PooledSD = np.sqrt((BioPostFactor2DOF * BioPreFactor2Variance + BioPostFactor2DOF * BioPostFactor2Variance) / (BioPostFactor2DOF + BioPostFactor2DOF))

BioFactor2EffectSize = (BioPostFactor2Mean - BioPreFactor2Mean)/BioFactor2PooledSD



Finally, the math attitudes section. This is the presurvey portion

In [10]:
MathAttPre = PreSurveyData.iloc[:,41:61][1:]
MathAttPre.dropna(inplace=True)
MathAttPre = MathAttPre.astype(str).astype(int)

#This is the first set of factors, reversing question 1
MathPreFactor1Frame = MathAttPre.iloc[:,factor2Mask1]
MathPreFactor1Frame.loc[:,'Q32_1'] = MathPreFactor1Frame.loc[:,'Q32_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
MathPreFactor2Frame = MathAttPre.iloc[:,factor2Mask2]
MathPreFactor2Frame.loc[:,'Q32_7'] = MathPreFactor2Frame.loc[:,'Q32_7'].add(-8).mul(-1)
MathPreFactor2Frame.loc[:,'Q32_11'] = MathPreFactor2Frame.loc[:,'Q32_11'].add(-8).mul(-1)
MathPreFactor2Frame.loc[:,'Q32_14'] = MathPreFactor2Frame.loc[:,'Q32_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(MathPreFactor1Frame))
print(pg.cronbach_alpha(MathPreFactor2Frame))

print(MathPreFactor1Frame.corr())
print(MathPreFactor2Frame.corr())


(0.8226601090100584, array([0.8  , 0.843]))
(0.8108455612804587, array([0.787, 0.833]))
           Q32_1     Q32_4     Q32_5    Q32_10
Q32_1   1.000000  0.598340  0.587169  0.397380
Q32_4   0.598340  1.000000  0.662338  0.548867
Q32_5   0.587169  0.662338  1.000000  0.458982
Q32_10  0.397380  0.548867  0.458982  1.000000
           Q32_7    Q32_11    Q32_14    Q32_17
Q32_7   1.000000  0.645108  0.621118  0.390556
Q32_11  0.645108  1.000000  0.708807  0.399225
Q32_14  0.621118  0.708807  1.000000  0.415008
Q32_17  0.390556  0.399225  0.415008  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Now for the postsemester survey, for the math attitudes

In [11]:
MathAttPost = PostSurveyData.iloc[:,41:61][1:]
MathAttPost.dropna(inplace=True)
MathAttPost = MathAttPost.astype(str).astype(int)

#This is the first set of factors, reversing question 1
MathPostFactor1Frame = MathAttPost.iloc[:,factor2Mask1]
MathPostFactor1Frame.loc[:,'Q32_1'] = MathPostFactor1Frame.loc[:,'Q32_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
MathPostFactor2Frame = MathAttPost.iloc[:,factor2Mask2]
MathPostFactor2Frame.loc[:,'Q32_7'] = MathPostFactor2Frame.loc[:,'Q32_7'].add(-8).mul(-1)
MathPostFactor2Frame.loc[:,'Q32_11'] = MathPostFactor2Frame.loc[:,'Q32_11'].add(-8).mul(-1)
MathPostFactor2Frame.loc[:,'Q32_14'] = MathPostFactor2Frame.loc[:,'Q32_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(MathPostFactor1Frame))
print(pg.cronbach_alpha(MathPostFactor2Frame))

print(MathPostFactor1Frame.corr())
print(MathPostFactor2Frame.corr())


(0.8728177104168708, array([0.839, 0.901]))
(0.8326273318158199, array([0.788, 0.87 ]))
           Q32_1     Q32_4     Q32_5    Q32_10
Q32_1   1.000000  0.700676  0.684215  0.528666
Q32_4   0.700676  1.000000  0.727900  0.625575
Q32_5   0.684215  0.727900  1.000000  0.547514
Q32_10  0.528666  0.625575  0.547514  1.000000
           Q32_7    Q32_11    Q32_14    Q32_17
Q32_7   1.000000  0.750106  0.677991  0.390445
Q32_11  0.750106  1.000000  0.688000  0.347640
Q32_14  0.677991  0.688000  1.000000  0.537816
Q32_17  0.390445  0.347640  0.537816  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [12]:
#Calculate the pre and post means for factor 1, as well as effect size
MathPreFactor1Mean = (MathPreFactor1Frame).mean().mean()
MathPreFactor1DOF = (MathPreFactor1Frame.count()-1).sum()
MathPreFactor1Variance = ((MathPreFactor1Frame.std() ** 2) * (MathPreFactor1Frame.count() - 1)).sum() / MathPreFactor1DOF

MathPostFactor1Mean = (MathPostFactor1Frame).mean().mean()
MathPostFactor1DOF = (MathPostFactor1Frame.count()-1).sum()
MathPostFactor1Variance = ((MathPostFactor1Frame.std() ** 2) * (MathPostFactor1Frame.count() - 1)).sum() / MathPostFactor1DOF

MathFactor1PooledSD = np.sqrt((MathPostFactor1DOF * MathPreFactor1Variance + MathPostFactor1DOF * MathPostFactor1Variance) / (MathPostFactor1DOF + MathPostFactor1DOF))

MathFactor1EffectSize = (MathPostFactor1Mean - MathPreFactor1Mean)/MathFactor1PooledSD

#Calculate the pre and post means for factor 2, as well as effect size
MathPreFactor2Mean = (MathPreFactor2Frame).mean().mean()
MathPreFactor2DOF = (MathPreFactor2Frame.count()-1).sum()
MathPreFactor2Variance = ((MathPreFactor2Frame.std() ** 2) * (MathPreFactor2Frame.count() - 1)).sum() / MathPreFactor2DOF

MathPostFactor2Mean = (MathPostFactor2Frame).mean().mean()
MathPostFactor2DOF = (MathPostFactor2Frame.count()-1).sum()
MathPostFactor2Variance = ((MathPostFactor2Frame.std() ** 2) * (MathPostFactor2Frame.count() - 1)).sum() / MathPostFactor2DOF

MathFactor2PooledSD = np.sqrt((MathPostFactor2DOF * MathPreFactor2Variance + MathPostFactor2DOF * MathPostFactor2Variance) / (MathPostFactor2DOF + MathPostFactor2DOF))

MathFactor2EffectSize = (MathPostFactor2Mean - MathPreFactor2Mean)/MathFactor2PooledSD



In [13]:
#The means and effect sizes for each factor in the three surveys
AttitudeCompare = pd.DataFrame(columns = ['Subscale','Presemester', 'Postsemester', 'Effect size'])

#This is a spacer
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Factors', \
                             'Presemester': '', \
                             'Postsemester': '',
                             'Effect size': ''}]\
                          )

#This is the first factor for chemistry
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Cognition, CHEM', \
                             'Presemester': 100*ChemPreFactor1Mean/7, \
                             'Postsemester': 100*ChemPostFactor1Mean/7,
                             'Effect size': ChemFactor1EffectSize}]\
                          )

#This is the first factor for bio
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Cognition, BIO', \
                             'Presemester': 100*BioPreFactor1Mean/7, \
                             'Postsemester': 100*BioPostFactor1Mean/7,
                             'Effect size': BioFactor1EffectSize}]\
                          )

#This is the first factor for math
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Cognition, MATH', \
                             'Presemester': 100*MathPreFactor1Mean/7, \
                             'Postsemester': 100*MathPostFactor1Mean/7,
                             'Effect size': MathFactor1EffectSize}]\
                          )

#This is the second factor for chemistry
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Affect, CHEM', \
                             'Presemester': 100*ChemPreFactor2Mean/7, \
                             'Postsemester': 100*ChemPostFactor2Mean/7,
                             'Effect size': ChemFactor2EffectSize}]\
                          )

#This is the second factor for bio
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Affect, BIO', \
                             'Presemester': 100*BioPreFactor2Mean/7, \
                             'Postsemester': 100*BioPostFactor2Mean/7,
                             'Effect size': BioFactor2EffectSize}]\
                          )

#This is the second factor for math
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Affect, MATH', \
                             'Presemester': 100*MathPreFactor2Mean/7, \
                             'Postsemester': 100*MathPostFactor2Mean/7,
                             'Effect size': MathFactor2EffectSize}]\
                          )




In [25]:
AttitudeCompare

#ExportFilename = "AttitudeCompare_Lewis.xlsx"
#AttitudeCompare.to_excel(path + ExportFilename)


Unnamed: 0,Subscale,Presemester,Postsemester,Effect size
0,Factors,,,
0,"Cognition, CHEM",46.0454,44.2857,-0.0951627
0,"Cognition, BIO",49.8861,47.7528,-0.0990981
0,"Cognition, MATH",43.5245,43.555,0.00126883
0,"Affect, CHEM",63.5653,60.9586,-0.126242
0,"Affect, BIO",67.6294,63.6437,-0.196649
0,"Affect, MATH",61.7362,62.1716,0.017141


In [15]:
#The means and standard deviations for each factor in the three surveys
AttitudeData = pd.DataFrame(columns = ['Subscale','Presemester mean', \
                                       'Presemester SD', \
                                       'Postsemester mean', \
                                       'Postsemester SD'])

#This is a spacer
AttitudeData = AttitudeData.append( [{'Subscale': 'Factors', \
                             'Presemester mean': '', \
                             'Presemester SD': '',\
                             'Postsemester mean': '',\
                             'Postsemester SD': ''}]\
                          )

#This is the first factor for chemistry
AttitudeData = AttitudeData.append( [{'Subscale': 'Cognition, CHEM', \
                             'Presemester mean': ChemPreFactor1Mean, \
                             'Presemester SD': np.sqrt(ChemPreFactor1Variance), \
                             'Postsemester mean': ChemPostFactor1Mean, \
                             'Postsemester SD': np.sqrt(ChemPostFactor1Variance)}]\
                          )

#This is the first factor for bio
AttitudeData = AttitudeData.append( [{'Subscale': 'Cognition, BIO', \
                             'Presemester mean': BioPreFactor1Mean, \
                             'Presemester SD': np.sqrt(BioPreFactor1Variance), \
                             'Postsemester mean': BioPostFactor1Mean, \
                             'Postsemester SD': np.sqrt(BioPostFactor1Variance)}]\
                          )

#This is the first factor for math
AttitudeData = AttitudeData.append( [{'Subscale': 'Cognition, MATH', \
                             'Presemester mean': MathPreFactor1Mean, \
                             'Presemester SD': np.sqrt(MathPreFactor1Variance), \
                             'Postsemester mean': MathPostFactor1Mean, \
                             'Postsemester SD': np.sqrt(MathPostFactor1Variance)}]\
                          )

#This is the second factor for chemistry
AttitudeData = AttitudeData.append( [{'Subscale': 'Affect, CHEM', \
                             'Presemester mean': ChemPreFactor2Mean, \
                             'Presemester SD': np.sqrt(ChemPreFactor2Variance), \
                             'Postsemester mean': ChemPostFactor2Mean, \
                             'Postsemester SD': np.sqrt(ChemPostFactor2Variance)}]\
                          )

#This is the second factor for bio
AttitudeData = AttitudeData.append( [{'Subscale': 'Affect, BIO', \
                             'Presemester mean': BioPreFactor2Mean, \
                             'Presemester SD': np.sqrt(BioPreFactor2Variance), \
                             'Postsemester mean': BioPostFactor2Mean, \
                             'Postsemester SD': np.sqrt(BioPostFactor2Variance)}]\
                          )

#This is the second factor for math
AttitudeData = AttitudeData.append( [{'Subscale': 'Affect, MATH', \
                             'Presemester mean': MathPreFactor2Mean, \
                             'Presemester SD': np.sqrt(MathPreFactor2Variance), \
                             'Postsemester mean': MathPostFactor2Mean, \
                             'Postsemester SD': np.sqrt(MathPostFactor2Variance)}]\
                          )



In [21]:
AttitudeData

#ExportFilename = "AttitudeData_Lewis.xlsx"
#AttitudeData.to_excel(path + ExportFilename)


In [17]:
#Calculate the presemester t-value between Chem and Bio for factor 1
ChemBioPrePooledVar1 = (ChemPreFactor1DOF * ChemPreFactor1Variance + BioPreFactor1DOF * BioPreFactor1Variance) / (ChemPreFactor1DOF + BioPreFactor1DOF)
ChemBioPreN1 = (1/ChemPreFactor1DOF) + (1/BioPreFactor1DOF)
ChemBioPreT1 = (ChemPreFactor1Mean - BioPreFactor1Mean)/np.sqrt(ChemBioPrePooledVar1*ChemBioPreN1)

#Calculate the presemester t-value between Chem and Math for factor 1
ChemMathPrePooledVar1 = (ChemPreFactor1DOF * ChemPreFactor1Variance + MathPreFactor1DOF * MathPreFactor1Variance) / (ChemPreFactor1DOF + MathPreFactor1DOF)
ChemMathPreN1 = (1/ChemPreFactor1DOF) + (1/MathPreFactor1DOF)
ChemMathPreT1 = (ChemPreFactor1Mean - MathPreFactor1Mean)/np.sqrt(ChemMathPrePooledVar1*ChemMathPreN1)

#Calculate the presemester t-value between Bio and Math for factor 1
BioMathPrePooledVar1 = (BioPreFactor1DOF * BioPreFactor1Variance + MathPreFactor1DOF * MathPreFactor1Variance) / (BioPreFactor1DOF + MathPreFactor1DOF)
BioMathPreN1 = (1/BioPreFactor1DOF) + (1/BioPreFactor1DOF)
BioMathPreT1 = (BioPreFactor1Mean - MathPreFactor1Mean)/np.sqrt(BioMathPrePooledVar1*BioMathPreN1)

#Calculate the presemester t-value between Chem and Bio for factor 2
ChemBioPrePooledVar2 = (ChemPreFactor2DOF * ChemPreFactor2Variance + BioPreFactor2DOF * BioPreFactor2Variance) / (ChemPreFactor2DOF + BioPreFactor2DOF)
ChemBioPreN2 = (1/ChemPreFactor2DOF) + (1/BioPreFactor2DOF)
ChemBioPreT2 = (ChemPreFactor2Mean - BioPreFactor2Mean)/np.sqrt(ChemBioPrePooledVar2*ChemBioPreN2)

#Calculate the presemester t-value between Chem and Math for factor 2
ChemMathPrePooledVar2 = (ChemPreFactor2DOF * ChemPreFactor2Variance + MathPreFactor2DOF * MathPreFactor2Variance) / (ChemPreFactor2DOF + MathPreFactor2DOF)
ChemMathPreN2 = (1/ChemPreFactor2DOF) + (1/MathPreFactor2DOF)
ChemMathPreT2 = (ChemPreFactor2Mean - MathPreFactor2Mean)/np.sqrt(ChemMathPrePooledVar2*ChemMathPreN2)

#Calculate the presemester t-value between Bio and Math for factor 2
BioMathPrePooledVar2 = (BioPreFactor2DOF * BioPreFactor2Variance + MathPreFactor2DOF * MathPreFactor2Variance) / (BioPreFactor2DOF + MathPreFactor2DOF)
BioMathPreN2 = (1/BioPreFactor2DOF) + (1/BioPreFactor2DOF)
BioMathPreT2 = (BioPreFactor2Mean - MathPreFactor2Mean)/np.sqrt(BioMathPrePooledVar2*BioMathPreN2)

#Calculate the postsemester t-value between Chem and Bio for factor 1
ChemBioPostPooledVar1 = (ChemPostFactor1DOF * ChemPostFactor1Variance + BioPostFactor1DOF * BioPostFactor1Variance) / (ChemPostFactor1DOF + BioPostFactor1DOF)
ChemBioPostN1 = (1/ChemPostFactor1DOF) + (1/BioPostFactor1DOF)
ChemBioPostT1 = (ChemPostFactor1Mean - BioPostFactor1Mean)/np.sqrt(ChemBioPostPooledVar1*ChemBioPostN1)

#Calculate the postsemester t-value between Chem and Math for factor 1
ChemMathPostPooledVar1 = (ChemPostFactor1DOF * ChemPostFactor1Variance + MathPostFactor1DOF * MathPostFactor1Variance) / (ChemPostFactor1DOF + MathPostFactor1DOF)
ChemMathPostN1 = (1/ChemPostFactor1DOF) + (1/MathPostFactor1DOF)
ChemMathPostT1 = (ChemPostFactor1Mean - MathPostFactor1Mean)/np.sqrt(ChemMathPostPooledVar1*ChemMathPostN1)

#Calculate the postsemester t-value between Bio and Math for factor 1
BioMathPostPooledVar1 = (BioPostFactor1DOF * BioPostFactor1Variance + MathPostFactor1DOF * MathPostFactor1Variance) / (BioPostFactor1DOF + MathPostFactor1DOF)
BioMathPostN1 = (1/BioPostFactor1DOF) + (1/BioPostFactor1DOF)
BioMathPostT1 = (BioPostFactor1Mean - MathPostFactor1Mean)/np.sqrt(BioMathPostPooledVar1*BioMathPostN1)

#Calculate the postsemester t-value between Chem and Bio for factor 2
ChemBioPostPooledVar2 = (ChemPostFactor2DOF * ChemPostFactor2Variance + BioPostFactor2DOF * BioPostFactor2Variance) / (ChemPostFactor2DOF + BioPostFactor2DOF)
ChemBioPostN2 = (1/ChemPostFactor2DOF) + (1/BioPostFactor2DOF)
ChemBioPostT2 = (ChemPostFactor2Mean - BioPostFactor2Mean)/np.sqrt(ChemBioPostPooledVar2*ChemBioPostN2)

#Calculate the postsemester t-value between Chem and Math for factor 2
ChemMathPostPooledVar2 = (ChemPostFactor2DOF * ChemPostFactor2Variance + MathPostFactor2DOF * MathPostFactor2Variance) / (ChemPostFactor2DOF + MathPostFactor2DOF)
ChemMathPostN2 = (1/ChemPostFactor2DOF) + (1/MathPostFactor2DOF)
ChemMathPostT2 = (ChemPostFactor2Mean - MathPostFactor2Mean)/np.sqrt(ChemMathPostPooledVar2*ChemMathPostN2)

#Calculate the postsemester t-value between Bio and Math for factor 2
BioMathPostPooledVar2 = (BioPostFactor2DOF * BioPostFactor2Variance + MathPostFactor2DOF * MathPostFactor2Variance) / (BioPostFactor2DOF + MathPostFactor2DOF)
BioMathPostN2 = (1/BioPostFactor2DOF) + (1/BioPostFactor2DOF)
BioMathPostT2 = (BioPostFactor2Mean - MathPostFactor2Mean)/np.sqrt(BioMathPostPooledVar2*BioMathPostN2)



In [18]:
#The means and standard deviations for each factor in the three surveys
TCompare = pd.DataFrame(columns = ['Subscale','Presemester t-value', \
                              'Presemester DOF', \
                                      'Postsemester t-value', \
                                       'Postsemester DOF'])

#This is a spacer
TCompare = TCompare.append( [{'Subscale': 'Factors', \
                             'Presemester t-value': '', \
                             'Presemester DOF': '', \
                             'Postsemester t-value': '',\
                             'Postsemester DOF': ''}]\
                          )

#This is the first factor for chemistry
TCompare = TCompare.append( [{'Subscale': 'Cognition, CHEM-BIO', \
                             'Presemester t-value': ChemBioPreT1, \
                             'Presemester DOF': ChemPreFactor1DOF + BioPreFactor1DOF, \
                             'Postsemester t-value': ChemBioPostT1, \
                             'Postsemester DOF': ChemPostFactor1DOF + BioPostFactor1DOF}]\
                          )

#This is the first factor for bio
TCompare = TCompare.append( [{'Subscale': 'Cognition, CHEM-MATH', \
                             'Presemester t-value': ChemMathPreT1, \
                             'Presemester DOF': ChemPreFactor1DOF + MathPreFactor1DOF, \
                             'Postsemester t-value': ChemMathPostT1, \
                             'Postsemester DOF': ChemPostFactor1DOF + MathPostFactor1DOF}]\
                          )

#This is the first factor for math
TCompare = TCompare.append( [{'Subscale': 'Cognition, BIO-MATH', \
                             'Presemester t-value': BioMathPreT1, \
                             'Presemester DOF': BioPreFactor1DOF + MathPreFactor1DOF, \
                             'Postsemester t-value': BioMathPostT1, \
                             'Postsemester DOF': BioPostFactor1DOF + MathPostFactor1DOF}]\
                          )

#This is the second factor for chemistry
TCompare = TCompare.append( [{'Subscale': 'Affect, CHEM-BIO', \
                             'Presemester t-value': ChemBioPreT2, \
                             'Presemester DOF': ChemPreFactor2DOF + BioPreFactor2DOF, \
                             'Postsemester t-value': ChemBioPostT2, \
                             'Postsemester DOF': ChemPostFactor2DOF + BioPostFactor2DOF}]\
                          )

#This is the second factor for bio
TCompare = TCompare.append( [{'Subscale': 'Affect, CHEM-MATH', \
                             'Presemester t-value': ChemMathPreT2, \
                             'Presemester DOF': ChemPreFactor2DOF + MathPreFactor2DOF, \
                             'Postsemester t-value': ChemMathPostT2, \
                             'Postsemester DOF': ChemPostFactor2DOF + MathPostFactor2DOF}]\
                          )

#This is the second factor for math
TCompare = TCompare.append( [{'Subscale': 'Affect, BIO-MATH', \
                             'Presemester t-value': BioMathPreT2, \
                             'Presemester DOF': BioPreFactor2DOF + MathPreFactor2DOF, \
                             'Postsemester t-value': BioMathPostT2, \
                             'Postsemester DOF': BioPostFactor2DOF + MathPostFactor2DOF}]\
                          )



In [24]:
TCompare

#ExportFilename = "TCompare_Lewis.xlsx"
#TCompare.to_excel(path + ExportFilename)


Unnamed: 0,Subscale,Presemester t-value,Presemester DOF,Postsemester t-value,Postsemester DOF
0,Factors,,,,
0,"Cognition, CHEM-BIO",-7.37462,5548.0,-3.20957,1464.0
0,"Cognition, CHEM-MATH",4.53439,5528.0,0.630022,1448.0
0,"Cognition, BIO-MATH",10.7384,5492.0,3.347,1400.0
0,"Affect, CHEM-BIO",-7.70594,5548.0,-2.41556,1464.0
0,"Affect, CHEM-MATH",3.06165,5528.0,-0.963827,1448.0
0,"Affect, BIO-MATH",9.90458,5492.0,1.1638,1400.0
