In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pingouin as pg


This just imports the pre and post surveys as pandas dataframes.

In [2]:
path = "/Users/erikmenke/UC-Merced/Data/2019-2020/NSF HSI/Spring 2020/Survey/Postterm/"
PreSurveyDataFilename = "Chem Attitudes_Pre-Spring 2020.xlsx"
PostSurveyDataFilename = "Chem Attitudes_Post-Spring 2020.xlsx"

PreSurveyData = pd.read_excel(path+PreSurveyDataFilename)
PostSurveyData = pd.read_excel(path+PostSurveyDataFilename)


First, I'm going to create boolean masks based on the factor analysis to create different groups for the dataframes. The first mask will pick out questions 1, 4, 5, and 10 which all share a common factor. The second mask will pick out questions 7, 11, 14, and 17.

In [3]:
#First mask picks out questions 1, 4, 5, and 10
factor2Mask1 = [True] + 2*[False] + 2*[True] + 4*[False] + [True] + 10*[False]

#Second mask picks out questions 7, 11, 14, and 17
factor2Mask2 = 6*[False] + [True] + 3*[False] + [True] + 2*[False] + [True] + 2*[False] + [True] + 3*[False]



Get the loadings for the attitudes on chemistry for the presemester survey

In [4]:
ChemAttPre = PreSurveyData.iloc[:,1:21][1:]
ChemAttPre.dropna(inplace=True)
ChemAttPre = ChemAttPre.astype(str).astype(int)

#This is the first set of factors, reversing question 1
ChemPreFactor1Frame = ChemAttPre.iloc[:,factor2Mask1]
ChemPreFactor1Frame.loc[:,'Q1_1'] = ChemPreFactor1Frame.loc[:,'Q1_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
ChemPreFactor2Frame = ChemAttPre.iloc[:,factor2Mask2]
ChemPreFactor2Frame.loc[:,'Q1_7'] = ChemPreFactor2Frame.loc[:,'Q1_7'].add(-8).mul(-1)
ChemPreFactor2Frame.loc[:,'Q1_11'] = ChemPreFactor2Frame.loc[:,'Q1_11'].add(-8).mul(-1)
ChemPreFactor2Frame.loc[:,'Q1_14'] = ChemPreFactor2Frame.loc[:,'Q1_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(ChemPreFactor1Frame))
print(pg.cronbach_alpha(ChemPreFactor2Frame))

print(ChemPreFactor1Frame.corr())
print(ChemPreFactor2Frame.corr())


(0.7918538298054539, array([0.752, 0.827]))
(0.7573333333333332, array([0.711, 0.798]))
           Q1_1      Q1_4      Q1_5     Q1_10
Q1_1   1.000000  0.500877  0.535827  0.308282
Q1_4   0.500877  1.000000  0.676050  0.481227
Q1_5   0.535827  0.676050  1.000000  0.422939
Q1_10  0.308282  0.481227  0.422939  1.000000
           Q1_7     Q1_11     Q1_14     Q1_17
Q1_7   1.000000  0.614699  0.553080  0.302066
Q1_11  0.614699  1.000000  0.585559  0.307654
Q1_14  0.553080  0.585559  1.000000  0.358467
Q1_17  0.302066  0.307654  0.358467  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Here's the code for the post semester survey, for the chemistry attitudes portion

In [12]:
ChemAttPost = PostSurveyData.iloc[:,2:22][1:]
ChemAttPost.dropna(inplace=True)
ChemAttPost = ChemAttPost.astype(str).astype(int)

#This is the first set of factors, reversing question 1
ChemPostFactor1Frame = ChemAttPost.iloc[:,factor2Mask1]
ChemPostFactor1Frame.loc[:,'Q1_1'] = ChemPostFactor1Frame.loc[:,'Q1_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
ChemPostFactor2Frame = ChemAttPost.iloc[:,factor2Mask2]
ChemPostFactor2Frame.loc[:,'Q1_7'] = ChemPostFactor2Frame.loc[:,'Q1_7'].add(-8).mul(-1)
ChemPostFactor2Frame.loc[:,'Q1_11'] = ChemPostFactor2Frame.loc[:,'Q1_11'].add(-8).mul(-1)
ChemPostFactor2Frame.loc[:,'Q1_14'] = ChemPostFactor2Frame.loc[:,'Q1_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(ChemPostFactor1Frame))
print(pg.cronbach_alpha(ChemPostFactor2Frame))

print(ChemPostFactor1Frame.corr())
print(ChemPostFactor2Frame.corr())


(0.7885282443112059, array([0.719, 0.844]))
(0.7666772124663473, array([0.69 , 0.828]))
           Q1_1      Q1_4      Q1_5     Q1_10
Q1_1   1.000000  0.374031  0.562027  0.443585
Q1_4   0.374031  1.000000  0.561290  0.521014
Q1_5   0.562027  0.561290  1.000000  0.472626
Q1_10  0.443585  0.521014  0.472626  1.000000
           Q1_7     Q1_11     Q1_14     Q1_17
Q1_7   1.000000  0.715797  0.639597  0.182754
Q1_11  0.715797  1.000000  0.734988  0.268810
Q1_14  0.639597  0.734988  1.000000  0.234149
Q1_17  0.182754  0.268810  0.234149  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [13]:
#Calculate the pre and post means for factor 1, as well as effect size
ChemPreFactor1Mean = (ChemPreFactor1Frame).mean().mean()
ChemPreFactor1DOF = (ChemPreFactor1Frame.count()-1).sum()
ChemPreFactor1Variance = ((ChemPreFactor1Frame.std() ** 2) * (ChemPreFactor1Frame.count() - 1)).sum() / ChemPreFactor1DOF

ChemPostFactor1Mean = (ChemPostFactor1Frame).mean().mean()
ChemPostFactor1DOF = (ChemPostFactor1Frame.count()-1).sum()
ChemPostFactor1Variance = ((ChemPostFactor1Frame.std() ** 2) * (ChemPostFactor1Frame.count() - 1)).sum() / ChemPostFactor1DOF

ChemFactor1PooledSD = np.sqrt((ChemPostFactor1DOF * ChemPreFactor1Variance + ChemPostFactor1DOF * ChemPostFactor1Variance) / (ChemPostFactor1DOF + ChemPostFactor1DOF))

ChemFactor1EffectSize = (ChemPostFactor1Mean - ChemPreFactor1Mean)/ChemFactor1PooledSD

#Calculate the pre and post means for factor 2, as well as effect size
ChemPreFactor2Mean = (ChemPreFactor2Frame).mean().mean()
ChemPreFactor2DOF = (ChemPreFactor2Frame.count()-1).sum()
ChemPreFactor2Variance = ((ChemPreFactor2Frame.std() ** 2) * (ChemPreFactor2Frame.count() - 1)).sum() / ChemPreFactor2DOF

ChemPostFactor2Mean = (ChemPostFactor2Frame).mean().mean()
ChemPostFactor2DOF = (ChemPostFactor2Frame.count()-1).sum()
ChemPostFactor2Variance = ((ChemPostFactor2Frame.std() ** 2) * (ChemPostFactor2Frame.count() - 1)).sum() / ChemPostFactor2DOF

ChemFactor2PooledSD = np.sqrt((ChemPostFactor2DOF * ChemPreFactor2Variance + ChemPostFactor2DOF * ChemPostFactor2Variance) / (ChemPostFactor2DOF + ChemPostFactor2DOF))

ChemFactor2EffectSize = (ChemPostFactor2Mean - ChemPreFactor2Mean)/ChemFactor2PooledSD




Now, trying this with the bio attitudes section of the presurvey

In [14]:
BioAttPre = PreSurveyData.iloc[:,21:41][1:]
BioAttPre.dropna(inplace=True)
BioAttPre = BioAttPre.astype(str).astype(int)

#This is the first set of factors, reversing question 1
BioPreFactor1Frame = BioAttPre.iloc[:,factor2Mask1]
BioPreFactor1Frame.loc[:,'Q34_1'] = BioPreFactor1Frame.loc[:,'Q34_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
BioPreFactor2Frame = BioAttPre.iloc[:,factor2Mask2]
BioPreFactor2Frame.loc[:,'Q34_7'] = BioPreFactor2Frame.loc[:,'Q34_7'].add(-8).mul(-1)
BioPreFactor2Frame.loc[:,'Q34_11'] = BioPreFactor2Frame.loc[:,'Q34_11'].add(-8).mul(-1)
BioPreFactor2Frame.loc[:,'Q34_14'] = BioPreFactor2Frame.loc[:,'Q34_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(BioPreFactor1Frame))
print(pg.cronbach_alpha(BioPreFactor2Frame))

print(BioPreFactor1Frame.corr())
print(BioPreFactor2Frame.corr())


(0.7763168621894617, array([0.733, 0.814]))
(0.7567890787543129, array([0.709, 0.798]))
           Q34_1     Q34_4     Q34_5    Q34_10
Q34_1   1.000000  0.423984  0.484509  0.377124
Q34_4   0.423984  1.000000  0.575394  0.514878
Q34_5   0.484509  0.575394  1.000000  0.446081
Q34_10  0.377124  0.514878  0.446081  1.000000
           Q34_7    Q34_11    Q34_14    Q34_17
Q34_7   1.000000  0.725556  0.549038  0.263928
Q34_11  0.725556  1.000000  0.566703  0.293698
Q34_14  0.549038  0.566703  1.000000  0.321882
Q34_17  0.263928  0.293698  0.321882  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Now for the postsemester survey, for the Bio attitudes

In [15]:
BioAttPost = PostSurveyData.iloc[:,22:42][1:]
BioAttPost.dropna(inplace=True)
BioAttPost = BioAttPost.astype(str).astype(int)

#This is the first set of factors, reversing question 1
BioPostFactor1Frame = BioAttPost.iloc[:,factor2Mask1]
BioPostFactor1Frame.loc[:,'Q34_1'] = BioPostFactor1Frame.loc[:,'Q34_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 4
BioPostFactor2Frame = BioAttPost.iloc[:,factor2Mask2]
BioPostFactor2Frame.loc[:,'Q34_7'] = BioPostFactor2Frame.loc[:,'Q34_7'].add(-8).mul(-1)
BioPostFactor2Frame.loc[:,'Q34_11'] = BioPostFactor2Frame.loc[:,'Q34_11'].add(-8).mul(-1)
BioPostFactor2Frame.loc[:,'Q34_14'] = BioPostFactor2Frame.loc[:,'Q34_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(BioPostFactor1Frame))
print(pg.cronbach_alpha(BioPostFactor2Frame))

print(BioPostFactor1Frame.corr())
print(BioPostFactor2Frame.corr())


(0.85604896664519, array([0.807, 0.895]))
(0.8047363243753223, array([0.739, 0.857]))
           Q34_1     Q34_4     Q34_5    Q34_10
Q34_1   1.000000  0.656677  0.705115  0.508019
Q34_4   0.656677  1.000000  0.749481  0.542804
Q34_5   0.705115  0.749481  1.000000  0.404873
Q34_10  0.508019  0.542804  0.404873  1.000000
           Q34_7    Q34_11    Q34_14    Q34_17
Q34_7   1.000000  0.690726  0.598151  0.336596
Q34_11  0.690726  1.000000  0.609334  0.369320
Q34_14  0.598151  0.609334  1.000000  0.467301
Q34_17  0.336596  0.369320  0.467301  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [16]:
#Calculate the pre and post means for factor 1, as well as effect size
BioPreFactor1Mean = (BioPreFactor1Frame).mean().mean()
BioPreFactor1DOF = (BioPreFactor1Frame.count()-1).sum()
BioPreFactor1Variance = ((BioPreFactor1Frame.std() ** 2) * (BioPreFactor1Frame.count() - 1)).sum() / BioPreFactor1DOF

BioPostFactor1Mean = (BioPostFactor1Frame).mean().mean()
BioPostFactor1DOF = (BioPostFactor1Frame.count()-1).sum()
BioPostFactor1Variance = ((BioPostFactor1Frame.std() ** 2) * (BioPostFactor1Frame.count() - 1)).sum() / BioPostFactor1DOF

BioFactor1PooledSD = np.sqrt((BioPostFactor1DOF * BioPreFactor1Variance + BioPostFactor1DOF * BioPostFactor1Variance) / (BioPostFactor1DOF + BioPostFactor1DOF))

BioFactor1EffectSize = (BioPostFactor1Mean - BioPreFactor1Mean)/BioFactor1PooledSD

#Calculate the pre and post means for factor 2, as well as effect size
BioPreFactor2Mean = (BioPreFactor2Frame).mean().mean()
BioPreFactor2DOF = (BioPreFactor2Frame.count()-1).sum()
BioPreFactor2Variance = ((BioPreFactor2Frame.std() ** 2) * (BioPreFactor2Frame.count() - 1)).sum() / BioPreFactor2DOF

BioPostFactor2Mean = (BioPostFactor2Frame).mean().mean()
BioPostFactor2DOF = (BioPostFactor2Frame.count()-1).sum()
BioPostFactor2Variance = ((BioPostFactor2Frame.std() ** 2) * (BioPostFactor2Frame.count() - 1)).sum() / BioPostFactor2DOF

BioFactor2PooledSD = np.sqrt((BioPostFactor2DOF * BioPreFactor2Variance + BioPostFactor2DOF * BioPostFactor2Variance) / (BioPostFactor2DOF + BioPostFactor2DOF))

BioFactor2EffectSize = (BioPostFactor2Mean - BioPreFactor2Mean)/BioFactor2PooledSD



Finally, the math attitudes section. This is the presurvey portion

In [17]:
MathAttPre = PreSurveyData.iloc[:,41:61][1:]
MathAttPre.dropna(inplace=True)
MathAttPre = MathAttPre.astype(str).astype(int)

#This is the first set of factors, reversing question 1
MathPreFactor1Frame = MathAttPre.iloc[:,factor2Mask1]
MathPreFactor1Frame.loc[:,'Q32_1'] = MathPreFactor1Frame.loc[:,'Q32_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
MathPreFactor2Frame = MathAttPre.iloc[:,factor2Mask2]
MathPreFactor2Frame.loc[:,'Q32_7'] = MathPreFactor2Frame.loc[:,'Q32_7'].add(-8).mul(-1)
MathPreFactor2Frame.loc[:,'Q32_11'] = MathPreFactor2Frame.loc[:,'Q32_11'].add(-8).mul(-1)
MathPreFactor2Frame.loc[:,'Q32_14'] = MathPreFactor2Frame.loc[:,'Q32_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(MathPreFactor1Frame))
print(pg.cronbach_alpha(MathPreFactor2Frame))

print(MathPreFactor1Frame.corr())
print(MathPreFactor2Frame.corr())


(0.8330485976939814, array([0.8  , 0.862]))
(0.7863566488393939, array([0.744, 0.823]))
           Q32_1     Q32_4     Q32_5    Q32_10
Q32_1   1.000000  0.591239  0.580794  0.482781
Q32_4   0.591239  1.000000  0.696248  0.575879
Q32_5   0.580794  0.696248  1.000000  0.472534
Q32_10  0.482781  0.575879  0.472534  1.000000
           Q32_7    Q32_11    Q32_14    Q32_17
Q32_7   1.000000  0.723594  0.563503  0.333095
Q32_11  0.723594  1.000000  0.629296  0.374943
Q32_14  0.563503  0.629296  1.000000  0.338835
Q32_17  0.333095  0.374943  0.338835  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Now for the postsemester survey, for the math attitudes

In [18]:
MathAttPost = PostSurveyData.iloc[:,42:62][1:]
MathAttPost.dropna(inplace=True)
MathAttPost = MathAttPost.astype(str).astype(int)

#This is the first set of factors, reversing question 1
MathPostFactor1Frame = MathAttPost.iloc[:,factor2Mask1]
MathPostFactor1Frame.loc[:,'Q32_1'] = MathPostFactor1Frame.loc[:,'Q32_1'].add(-8).mul(-1)

#This is the second set of factors, reversing questions 7, 11, and 14
MathPostFactor2Frame = MathAttPost.iloc[:,factor2Mask2]
MathPostFactor2Frame.loc[:,'Q32_7'] = MathPostFactor2Frame.loc[:,'Q32_7'].add(-8).mul(-1)
MathPostFactor2Frame.loc[:,'Q32_11'] = MathPostFactor2Frame.loc[:,'Q32_11'].add(-8).mul(-1)
MathPostFactor2Frame.loc[:,'Q32_14'] = MathPostFactor2Frame.loc[:,'Q32_14'].add(-8).mul(-1)

print(pg.cronbach_alpha(MathPostFactor1Frame))
print(pg.cronbach_alpha(MathPostFactor2Frame))

print(MathPostFactor1Frame.corr())
print(MathPostFactor2Frame.corr())


(0.8416749267605015, array([0.787, 0.885]))
(0.8315787555798488, array([0.774, 0.877]))
           Q32_1     Q32_4     Q32_5    Q32_10
Q32_1   1.000000  0.551356  0.733831  0.416135
Q32_4   0.551356  1.000000  0.621750  0.702165
Q32_5   0.733831  0.621750  1.000000  0.461328
Q32_10  0.416135  0.702165  0.461328  1.000000
           Q32_7    Q32_11    Q32_14    Q32_17
Q32_7   1.000000  0.678756  0.610561  0.425589
Q32_11  0.678756  1.000000  0.741700  0.445405
Q32_14  0.610561  0.741700  1.000000  0.509998
Q32_17  0.425589  0.445405  0.509998  1.000000


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [19]:
#Calculate the pre and post means for factor 1, as well as effect size
MathPreFactor1Mean = (MathPreFactor1Frame).mean().mean()
MathPreFactor1DOF = (MathPreFactor1Frame.count()-1).sum()
MathPreFactor1Variance = ((MathPreFactor1Frame.std() ** 2) * (MathPreFactor1Frame.count() - 1)).sum() / MathPreFactor1DOF

MathPostFactor1Mean = (MathPostFactor1Frame).mean().mean()
MathPostFactor1DOF = (MathPostFactor1Frame.count()-1).sum()
MathPostFactor1Variance = ((MathPostFactor1Frame.std() ** 2) * (MathPostFactor1Frame.count() - 1)).sum() / MathPostFactor1DOF

MathFactor1PooledSD = np.sqrt((MathPostFactor1DOF * MathPreFactor1Variance + MathPostFactor1DOF * MathPostFactor1Variance) / (MathPostFactor1DOF + MathPostFactor1DOF))

MathFactor1EffectSize = (MathPostFactor1Mean - MathPreFactor1Mean)/MathFactor1PooledSD

#Calculate the pre and post means for factor 2, as well as effect size
MathPreFactor2Mean = (MathPreFactor2Frame).mean().mean()
MathPreFactor2DOF = (MathPreFactor2Frame.count()-1).sum()
MathPreFactor2Variance = ((MathPreFactor2Frame.std() ** 2) * (MathPreFactor2Frame.count() - 1)).sum() / MathPreFactor2DOF

MathPostFactor2Mean = (MathPostFactor2Frame).mean().mean()
MathPostFactor2DOF = (MathPostFactor2Frame.count()-1).sum()
MathPostFactor2Variance = ((MathPostFactor2Frame.std() ** 2) * (MathPostFactor2Frame.count() - 1)).sum() / MathPostFactor2DOF

MathFactor2PooledSD = np.sqrt((MathPostFactor2DOF * MathPreFactor2Variance + MathPostFactor2DOF * MathPostFactor2Variance) / (MathPostFactor2DOF + MathPostFactor2DOF))

MathFactor2EffectSize = (MathPostFactor2Mean - MathPreFactor2Mean)/MathFactor2PooledSD



In [20]:
#The means and effect sizes for each factor in the three surveys
AttitudeCompare = pd.DataFrame(columns = ['Subscale','Presemester', 'Postsemester', 'Effect size'])

#This is a spacer
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Factors', \
                             'Presemester': '', \
                             'Postsemester': '',
                             'Effect size': ''}]\
                          )

#This is the first factor for chemistry
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Cognition, CHEM', \
                             'Presemester': 100*ChemPreFactor1Mean/7, \
                             'Postsemester': 100*ChemPostFactor1Mean/7,
                             'Effect size': ChemFactor1EffectSize}]\
                          )

#This is the first factor for bio
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Cognition, BIO', \
                             'Presemester': 100*BioPreFactor1Mean/7, \
                             'Postsemester': 100*BioPostFactor1Mean/7,
                             'Effect size': BioFactor1EffectSize}]\
                          )

#This is the first factor for math
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Cognition, MATH', \
                             'Presemester': 100*MathPreFactor1Mean/7, \
                             'Postsemester': 100*MathPostFactor1Mean/7,
                             'Effect size': MathFactor1EffectSize}]\
                          )

#This is the second factor for chemistry
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Affect, CHEM', \
                             'Presemester': 100*ChemPreFactor2Mean/7, \
                             'Postsemester': 100*ChemPostFactor2Mean/7,
                             'Effect size': ChemFactor2EffectSize}]\
                          )

#This is the second factor for bio
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Affect, BIO', \
                             'Presemester': 100*BioPreFactor2Mean/7, \
                             'Postsemester': 100*BioPostFactor2Mean/7,
                             'Effect size': BioFactor2EffectSize}]\
                          )

#This is the second factor for math
AttitudeCompare = AttitudeCompare.append( [{'Subscale': 'Affect, MATH', \
                             'Presemester': 100*MathPreFactor2Mean/7, \
                             'Postsemester': 100*MathPostFactor2Mean/7,
                             'Effect size': MathFactor2EffectSize}]\
                          )




In [28]:
AttitudeCompare

#ExportFilename = "AttitudeCompare_Lewis.xlsx"
#AttitudeCompare.to_excel(path + ExportFilename)



Unnamed: 0,Subscale,Presemester,Postsemester,Effect size
0,Factors,,,
0,"Cognition, CHEM",41.5052,44.9107,0.183847
0,"Cognition, BIO",49.8609,46.5539,-0.157177
0,"Cognition, MATH",42.0023,41.6667,-0.0152247
0,"Affect, CHEM",63.3619,63.006,-0.0174076
0,"Affect, BIO",69.3182,62.0614,-0.355336
0,"Affect, MATH",61.9087,63.6422,0.0717851


In [22]:
#The means and standard deviations for each factor in the three surveys
AttitudeData = pd.DataFrame(columns = ['Subscale','Presemester mean', \
                                       'Presemester SD', \
                                       'Postsemester mean', \
                                       'Postsemester SD'])

#This is a spacer
AttitudeData = AttitudeData.append( [{'Subscale': 'Factors', \
                             'Presemester mean': '', \
                             'Presemester SD': '',\
                             'Postsemester mean': '',\
                             'Postsemester SD': ''}]\
                          )

#This is the first factor for chemistry
AttitudeData = AttitudeData.append( [{'Subscale': 'Cognition, CHEM', \
                             'Presemester mean': ChemPreFactor1Mean, \
                             'Presemester SD': np.sqrt(ChemPreFactor1Variance), \
                             'Postsemester mean': ChemPostFactor1Mean, \
                             'Postsemester SD': np.sqrt(ChemPostFactor1Variance)}]\
                          )

#This is the first factor for bio
AttitudeData = AttitudeData.append( [{'Subscale': 'Cognition, BIO', \
                             'Presemester mean': BioPreFactor1Mean, \
                             'Presemester SD': np.sqrt(BioPreFactor1Variance), \
                             'Postsemester mean': BioPostFactor1Mean, \
                             'Postsemester SD': np.sqrt(BioPostFactor1Variance)}]\
                          )

#This is the first factor for math
AttitudeData = AttitudeData.append( [{'Subscale': 'Cognition, MATH', \
                             'Presemester mean': MathPreFactor1Mean, \
                             'Presemester SD': np.sqrt(MathPreFactor1Variance), \
                             'Postsemester mean': MathPostFactor1Mean, \
                             'Postsemester SD': np.sqrt(MathPostFactor1Variance)}]\
                          )

#This is the second factor for chemistry
AttitudeData = AttitudeData.append( [{'Subscale': 'Affect, CHEM', \
                             'Presemester mean': ChemPreFactor2Mean, \
                             'Presemester SD': np.sqrt(ChemPreFactor2Variance), \
                             'Postsemester mean': ChemPostFactor2Mean, \
                             'Postsemester SD': np.sqrt(ChemPostFactor2Variance)}]\
                          )

#This is the second factor for bio
AttitudeData = AttitudeData.append( [{'Subscale': 'Affect, BIO', \
                             'Presemester mean': BioPreFactor2Mean, \
                             'Presemester SD': np.sqrt(BioPreFactor2Variance), \
                             'Postsemester mean': BioPostFactor2Mean, \
                             'Postsemester SD': np.sqrt(BioPostFactor2Variance)}]\
                          )

#This is the second factor for math
AttitudeData = AttitudeData.append( [{'Subscale': 'Affect, MATH', \
                             'Presemester mean': MathPreFactor2Mean, \
                             'Presemester SD': np.sqrt(MathPreFactor2Variance), \
                             'Postsemester mean': MathPostFactor2Mean, \
                             'Postsemester SD': np.sqrt(MathPostFactor2Variance)}]\
                          )



In [29]:
AttitudeData

#ExportFilename = "AttitudeData_Lewis.xlsx"
#AttitudeData.to_excel(path + ExportFilename)



In [24]:
#Calculate the presemester t-value between Chem and Bio for factor 1
ChemBioPrePooledVar1 = (ChemPreFactor1DOF * ChemPreFactor1Variance + BioPreFactor1DOF * BioPreFactor1Variance) / (ChemPreFactor1DOF + BioPreFactor1DOF)
ChemBioPreN1 = (1/ChemPreFactor1DOF) + (1/BioPreFactor1DOF)
ChemBioPreT1 = (ChemPreFactor1Mean - BioPreFactor1Mean)/np.sqrt(ChemBioPrePooledVar1*ChemBioPreN1)

#Calculate the presemester t-value between Chem and Math for factor 1
ChemMathPrePooledVar1 = (ChemPreFactor1DOF * ChemPreFactor1Variance + MathPreFactor1DOF * MathPreFactor1Variance) / (ChemPreFactor1DOF + MathPreFactor1DOF)
ChemMathPreN1 = (1/ChemPreFactor1DOF) + (1/MathPreFactor1DOF)
ChemMathPreT1 = (ChemPreFactor1Mean - MathPreFactor1Mean)/np.sqrt(ChemMathPrePooledVar1*ChemMathPreN1)

#Calculate the presemester t-value between Bio and Math for factor 1
BioMathPrePooledVar1 = (BioPreFactor1DOF * BioPreFactor1Variance + MathPreFactor1DOF * MathPreFactor1Variance) / (BioPreFactor1DOF + MathPreFactor1DOF)
BioMathPreN1 = (1/BioPreFactor1DOF) + (1/BioPreFactor1DOF)
BioMathPreT1 = (BioPreFactor1Mean - MathPreFactor1Mean)/np.sqrt(BioMathPrePooledVar1*BioMathPreN1)

#Calculate the presemester t-value between Chem and Bio for factor 2
ChemBioPrePooledVar2 = (ChemPreFactor2DOF * ChemPreFactor2Variance + BioPreFactor2DOF * BioPreFactor2Variance) / (ChemPreFactor2DOF + BioPreFactor2DOF)
ChemBioPreN2 = (1/ChemPreFactor2DOF) + (1/BioPreFactor2DOF)
ChemBioPreT2 = (ChemPreFactor2Mean - BioPreFactor2Mean)/np.sqrt(ChemBioPrePooledVar2*ChemBioPreN2)

#Calculate the presemester t-value between Chem and Math for factor 2
ChemMathPrePooledVar2 = (ChemPreFactor2DOF * ChemPreFactor2Variance + MathPreFactor2DOF * MathPreFactor2Variance) / (ChemPreFactor2DOF + MathPreFactor2DOF)
ChemMathPreN2 = (1/ChemPreFactor2DOF) + (1/MathPreFactor2DOF)
ChemMathPreT2 = (ChemPreFactor2Mean - MathPreFactor2Mean)/np.sqrt(ChemMathPrePooledVar2*ChemMathPreN2)

#Calculate the presemester t-value between Bio and Math for factor 2
BioMathPrePooledVar2 = (BioPreFactor2DOF * BioPreFactor2Variance + MathPreFactor2DOF * MathPreFactor2Variance) / (BioPreFactor2DOF + MathPreFactor2DOF)
BioMathPreN2 = (1/BioPreFactor2DOF) + (1/BioPreFactor2DOF)
BioMathPreT2 = (BioPreFactor2Mean - MathPreFactor2Mean)/np.sqrt(BioMathPrePooledVar2*BioMathPreN2)

#Calculate the postsemester t-value between Chem and Bio for factor 1
ChemBioPostPooledVar1 = (ChemPostFactor1DOF * ChemPostFactor1Variance + BioPostFactor1DOF * BioPostFactor1Variance) / (ChemPostFactor1DOF + BioPostFactor1DOF)
ChemBioPostN1 = (1/ChemPostFactor1DOF) + (1/BioPostFactor1DOF)
ChemBioPostT1 = (ChemPostFactor1Mean - BioPostFactor1Mean)/np.sqrt(ChemBioPostPooledVar1*ChemBioPostN1)

#Calculate the postsemester t-value between Chem and Math for factor 1
ChemMathPostPooledVar1 = (ChemPostFactor1DOF * ChemPostFactor1Variance + MathPostFactor1DOF * MathPostFactor1Variance) / (ChemPostFactor1DOF + MathPostFactor1DOF)
ChemMathPostN1 = (1/ChemPostFactor1DOF) + (1/MathPostFactor1DOF)
ChemMathPostT1 = (ChemPostFactor1Mean - MathPostFactor1Mean)/np.sqrt(ChemMathPostPooledVar1*ChemMathPostN1)

#Calculate the postsemester t-value between Bio and Math for factor 1
BioMathPostPooledVar1 = (BioPostFactor1DOF * BioPostFactor1Variance + MathPostFactor1DOF * MathPostFactor1Variance) / (BioPostFactor1DOF + MathPostFactor1DOF)
BioMathPostN1 = (1/BioPostFactor1DOF) + (1/BioPostFactor1DOF)
BioMathPostT1 = (BioPostFactor1Mean - MathPostFactor1Mean)/np.sqrt(BioMathPostPooledVar1*BioMathPostN1)

#Calculate the postsemester t-value between Chem and Bio for factor 2
ChemBioPostPooledVar2 = (ChemPostFactor2DOF * ChemPostFactor2Variance + BioPostFactor2DOF * BioPostFactor2Variance) / (ChemPostFactor2DOF + BioPostFactor2DOF)
ChemBioPostN2 = (1/ChemPostFactor2DOF) + (1/BioPostFactor2DOF)
ChemBioPostT2 = (ChemPostFactor2Mean - BioPostFactor2Mean)/np.sqrt(ChemBioPostPooledVar2*ChemBioPostN2)

#Calculate the postsemester t-value between Chem and Math for factor 2
ChemMathPostPooledVar2 = (ChemPostFactor2DOF * ChemPostFactor2Variance + MathPostFactor2DOF * MathPostFactor2Variance) / (ChemPostFactor2DOF + MathPostFactor2DOF)
ChemMathPostN2 = (1/ChemPostFactor2DOF) + (1/MathPostFactor2DOF)
ChemMathPostT2 = (ChemPostFactor2Mean - MathPostFactor2Mean)/np.sqrt(ChemMathPostPooledVar2*ChemMathPostN2)

#Calculate the postsemester t-value between Bio and Math for factor 2
BioMathPostPooledVar2 = (BioPostFactor2DOF * BioPostFactor2Variance + MathPostFactor2DOF * MathPostFactor2Variance) / (BioPostFactor2DOF + MathPostFactor2DOF)
BioMathPostN2 = (1/BioPostFactor2DOF) + (1/BioPostFactor2DOF)
BioMathPostT2 = (BioPostFactor2Mean - MathPostFactor2Mean)/np.sqrt(BioMathPostPooledVar2*BioMathPostN2)



In [25]:
#The means and standard deviations for each factor in the three surveys
TCompare = pd.DataFrame(columns = ['Subscale','Presemester t-value', \
                              'Presemester DOF', \
                                      'Postsemester t-value', \
                                       'Postsemester DOF'])

#This is a spacer
TCompare = TCompare.append( [{'Subscale': 'Factors', \
                             'Presemester t-value': '', \
                             'Presemester DOF': '', \
                             'Postsemester t-value': '',\
                             'Postsemester DOF': ''}]\
                          )

#This is the first factor for chemistry
TCompare = TCompare.append( [{'Subscale': 'Cognition, CHEM-BIO', \
                             'Presemester t-value': ChemBioPreT1, \
                             'Presemester DOF': ChemPreFactor1DOF + BioPreFactor1DOF, \
                             'Postsemester t-value': ChemBioPostT1, \
                             'Postsemester DOF': ChemPostFactor1DOF + BioPostFactor1DOF}]\
                          )

#This is the first factor for bio
TCompare = TCompare.append( [{'Subscale': 'Cognition, CHEM-MATH', \
                             'Presemester t-value': ChemMathPreT1, \
                             'Presemester DOF': ChemPreFactor1DOF + MathPreFactor1DOF, \
                             'Postsemester t-value': ChemMathPostT1, \
                             'Postsemester DOF': ChemPostFactor1DOF + MathPostFactor1DOF}]\
                          )

#This is the first factor for math
TCompare = TCompare.append( [{'Subscale': 'Cognition, BIO-MATH', \
                             'Presemester t-value': BioMathPreT1, \
                             'Presemester DOF': BioPreFactor1DOF + MathPreFactor1DOF, \
                             'Postsemester t-value': BioMathPostT1, \
                             'Postsemester DOF': BioPostFactor1DOF + MathPostFactor1DOF}]\
                          )

#This is the second factor for chemistry
TCompare = TCompare.append( [{'Subscale': 'Affect, CHEM-BIO', \
                             'Presemester t-value': ChemBioPreT2, \
                             'Presemester DOF': ChemPreFactor2DOF + BioPreFactor2DOF, \
                             'Postsemester t-value': ChemBioPostT2, \
                             'Postsemester DOF': ChemPostFactor2DOF + BioPostFactor2DOF}]\
                          )

#This is the second factor for bio
TCompare = TCompare.append( [{'Subscale': 'Affect, CHEM-MATH', \
                             'Presemester t-value': ChemMathPreT2, \
                             'Presemester DOF': ChemPreFactor2DOF + MathPreFactor2DOF, \
                             'Postsemester t-value': ChemMathPostT2, \
                             'Postsemester DOF': ChemPostFactor2DOF + MathPostFactor2DOF}]\
                          )

#This is the second factor for math
TCompare = TCompare.append( [{'Subscale': 'Affect, BIO-MATH', \
                             'Presemester t-value': BioMathPreT2, \
                             'Presemester DOF': BioPreFactor2DOF + MathPreFactor2DOF, \
                             'Postsemester t-value': BioMathPostT2, \
                             'Postsemester DOF': BioPostFactor2DOF + MathPostFactor2DOF}]\
                          )



In [31]:
TCompare

#ExportFilename = "TCompare_Lewis.xlsx"
#TCompare.to_excel(path + ExportFilename)



Unnamed: 0,Subscale,Presemester t-value,Presemester DOF,Postsemester t-value,Postsemester DOF
0,Factors,,,,
0,"Cognition, CHEM-BIO",-10.8055,2492.0,-1.23354,928.0
0,"Cognition, CHEM-MATH",-0.610838,2480.0,2.40811,916.0
0,"Cognition, BIO-MATH",9.01496,2444.0,3.41832,892.0
0,"Affect, CHEM-BIO",-7.32582,2492.0,0.699022,928.0
0,"Affect, CHEM-MATH",1.61292,2480.0,-0.433147,916.0
0,"Affect, BIO-MATH",8.23213,2444.0,-1.06132,892.0
