# Loading Data with Pandas

In [1]:
import pandas as pd



In [2]:
neo_corrs = pd.read_csv('NEO_correlations.csv')
neo_corrs

Unnamed: 0,construct_1,construct_2,correlation
0,Anxiety,Anxiety,1.000000
1,Friendliness,Anxiety,-0.319029
2,Imagination,Anxiety,0.061749
3,Trust,Anxiety,-0.303660
4,Self-Efficacy,Anxiety,-0.453877
...,...,...,...
895,Vulnerability,Cautiousness,-0.276091
896,Cheerfulness,Cautiousness,-0.172144
897,Liberalism,Cautiousness,-0.125071
898,Sympathy,Cautiousness,0.117780


In [3]:
neo_items =  pd.read_csv('NEO_items.csv')[['construct', 'text']]
neo_items

Unnamed: 0,construct,text
0,Achievement-Striving,Go straight for the goal.
1,Achievement-Striving,Plunge into tasks with all my heart.
2,Achievement-Striving,Demand quality.
3,Achievement-Striving,Set high standards for myself and others.
4,Achievement-Striving,Turn plans into actions.
...,...,...
295,Vulnerability,Remain calm under pressure.
296,Vulnerability,Am calm even in tense situations.
297,Vulnerability,Can handle complex problems.
298,Vulnerability,Readily overcome setbacks.


# Embedding Constructs with a Sentence Transformer

In [4]:
from sentence_transformers import SentenceTransformer

In [5]:
# Embedding items
model = SentenceTransformer('all-MiniLM-L6-v2')
neo_items['embed'] = neo_items['text'].apply(lambda x: model.encode(x))
neo_items

Unnamed: 0,construct,text,embed
0,Achievement-Striving,Go straight for the goal.,"[0.010508136, 0.10021052, -0.07636007, 0.00171..."
1,Achievement-Striving,Plunge into tasks with all my heart.,"[-0.005888383, 0.01626125, 0.023178238, -0.011..."
2,Achievement-Striving,Demand quality.,"[-0.04246266, -0.0150403185, 0.018326316, -0.0..."
3,Achievement-Striving,Set high standards for myself and others.,"[0.0025991125, 0.051376447, -0.0050169597, -0...."
4,Achievement-Striving,Turn plans into actions.,"[-0.002227421, 0.07106716, 0.033379693, -0.022..."
...,...,...,...
295,Vulnerability,Remain calm under pressure.,"[0.008001904, 0.04168616, 0.028673416, 0.04836..."
296,Vulnerability,Am calm even in tense situations.,"[0.07779292, 0.017205391, 0.027962497, 0.01344..."
297,Vulnerability,Can handle complex problems.,"[-0.06403706, 0.10341669, 0.005948939, -0.0167..."
298,Vulnerability,Readily overcome setbacks.,"[-0.020695273, 0.061760057, 0.0056057484, 0.07..."


In [6]:
# Averaging item embeddings to get construct embeddings
construct_embeds = neo_items[['construct', 'embed']].groupby('construct').mean()
construct_embeds = pd.DataFrame(construct_embeds['embed'].to_list(), index=construct_embeds.index)
construct_embeds

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,374,375,376,377,378,379,380,381,382,383
construct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Achievement-Striving,0.004298,0.037264,0.007913,-0.004269,-0.018353,-0.024534,0.012682,-0.018506,-0.015681,0.005676,...,0.032268,0.011097,0.004869,0.020435,-0.064954,0.050593,0.072998,-0.023511,-0.039168,0.006319
Activity-Level,0.001619,-0.000604,0.015234,0.049282,0.046148,-0.005798,0.032762,-0.027327,0.021172,0.003219,...,0.037965,0.019592,0.011885,-0.003336,-0.004621,0.0337,0.038179,-0.020652,0.00125,-0.020256
Adventurousness,0.0258,0.004956,0.044257,0.048723,-0.01105,-0.011392,0.017286,-0.025266,0.005639,0.040548,...,0.059821,0.013491,0.006589,0.022432,0.010717,0.031726,0.047142,-0.00713,0.000165,0.012197
Aesthetic Appreciation/Artistic Interests,0.048291,0.025077,0.066371,-0.001419,-0.028676,0.025041,0.031712,-0.079248,0.031333,-0.000424,...,0.012982,-0.025234,-0.010336,0.068849,-0.00973,0.05812,0.048819,-0.013709,0.017838,-0.012106
Altruism,0.01069,0.026499,0.024885,-0.013526,0.021357,-0.020612,0.057289,-0.02874,0.009422,-0.016814,...,0.04515,-0.004136,0.0009,0.06719,0.022191,0.024426,0.049599,-0.025014,-0.042768,0.000707
Anger,0.018768,0.008189,0.044884,0.049368,0.01557,-0.012693,0.064483,-0.009928,0.017258,-0.020605,...,0.00885,-0.00756,0.015956,-9.8e-05,-0.000444,0.038712,0.040251,0.014108,-0.059225,-0.01234
Anxiety,0.039732,0.002361,0.0462,0.051786,0.043904,0.007185,0.056894,-0.01585,0.017888,-0.028865,...,0.035846,0.013569,0.005886,0.059854,0.018765,0.048199,0.03775,-0.008109,-0.062439,0.016752
Assertiveness,0.026696,0.000972,-0.005672,-0.019051,0.006382,0.015712,0.027128,-0.03962,0.019409,0.006405,...,0.065377,0.003177,0.016715,0.043915,-0.033603,0.06158,0.043831,-0.017892,-0.015792,0.022099
Cautiousness,0.038005,-0.004316,0.026376,0.019126,0.01372,0.006093,0.067304,-0.004575,0.031336,-0.002564,...,0.04782,-0.003146,-0.001774,0.028741,-0.026803,0.043275,0.047394,-0.030701,-0.044365,0.008522
Cheerfulness,0.013405,0.02248,0.024024,0.024458,0.005424,-0.021783,0.072394,0.006929,0.035095,-0.022695,...,0.060934,-0.02755,-0.014636,0.033432,-0.041717,0.030006,0.062872,0.007979,0.002071,0.011426


# Comparing Predicted and Observed Correlations

In [7]:
import numpy as np

In [8]:
predicted = construct_embeds.T.corr()
predicted

construct,Achievement-Striving,Activity-Level,Adventurousness,Aesthetic Appreciation/Artistic Interests,Altruism,Anger,Anxiety,Assertiveness,Cautiousness,Cheerfulness,...,Liberalism,Modesty/Humility,Morality,Orderliness,Self-Discipline,Self-Efficacy,Self-consciousness,Sympathy,Trust,Vulnerability
construct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Achievement-Striving,1.0,0.683325,0.596813,0.43818,0.585849,0.477232,0.558175,0.705198,0.770884,0.520514,...,0.422136,0.610099,0.703681,0.656243,0.808163,0.751365,0.5454,0.53212,0.496445,0.629537
Activity-Level,0.683325,1.0,0.642606,0.394938,0.572514,0.532134,0.572553,0.657762,0.713945,0.603842,...,0.363294,0.500476,0.563783,0.676124,0.732323,0.687178,0.536045,0.43935,0.428855,0.620304
Adventurousness,0.596813,0.642606,1.0,0.564447,0.594343,0.467224,0.605937,0.596638,0.640601,0.497843,...,0.369372,0.542682,0.54033,0.701515,0.542919,0.622761,0.532884,0.474609,0.471584,0.51133
Aesthetic Appreciation/Artistic Interests,0.43818,0.394938,0.564447,1.0,0.525627,0.392826,0.489925,0.517851,0.515103,0.48299,...,0.297742,0.479476,0.398306,0.523679,0.338669,0.432742,0.479937,0.460104,0.381838,0.422574
Altruism,0.585849,0.572514,0.594343,0.525627,1.0,0.521328,0.629514,0.76262,0.592671,0.575594,...,0.446789,0.67393,0.712856,0.618876,0.504058,0.593737,0.669224,0.746703,0.730641,0.563196
Anger,0.477232,0.532134,0.467224,0.392826,0.521328,1.0,0.642388,0.468607,0.497401,0.627919,...,0.243831,0.552908,0.482393,0.571992,0.459659,0.494562,0.572122,0.504789,0.45076,0.615453
Anxiety,0.558175,0.572553,0.605937,0.489925,0.629514,0.642388,1.0,0.596294,0.677406,0.552312,...,0.290887,0.575119,0.550478,0.675768,0.587711,0.640205,0.775005,0.535626,0.495912,0.814823
Assertiveness,0.705198,0.657762,0.596638,0.517851,0.76262,0.468607,0.596294,1.0,0.74114,0.541643,...,0.491006,0.682159,0.785709,0.645074,0.639355,0.703458,0.65922,0.63696,0.669129,0.630424
Cautiousness,0.770884,0.713945,0.640601,0.515103,0.592671,0.497401,0.677406,0.74114,1.0,0.606092,...,0.421035,0.591183,0.696156,0.732107,0.729203,0.713076,0.584459,0.505587,0.533595,0.738444
Cheerfulness,0.520514,0.603842,0.497843,0.48299,0.575594,0.627919,0.552312,0.541643,0.606092,1.0,...,0.301038,0.558844,0.524568,0.570549,0.477607,0.570062,0.537817,0.473613,0.442924,0.558679


In [12]:
# Aligning rows and columns the predicted and observed correlations
observed = neo_corrs.pivot(index='construct_1', columns='construct_2', values='correlation')
predicted, observed = predicted.align(observed)
np.fill_diagonal(predicted.values, np.nan), np.fill_diagonal(observed.values, np.nan)

# Taking the lower triangle of the correlation matrices
predicted_lower = predicted.where(np.tril(np.ones(predicted.shape)).astype(bool)).stack()
observed_lower = observed.where(np.tril(np.ones(observed.shape)).astype(bool)).stack()

# Mean correlation between predicted and observed
print(f'Mean r lower triangle: {predicted_lower.corr(observed_lower).round(2)}')
print(f'Mean r of absolute values lower triangle: {predicted_lower.abs().corr(observed_lower.abs()).round(2)}')

Mean r lower triangle: 0.24
Mean r of absolute values lower triangle: 0.5
