In [1]:
import pandas as pd
import open_ended_tools

# Define run parameters
directory = './Data_Training/'
file_answers = 'open_ended_answers.csv'
file_metrics = 'metrics.csv'
generate_embeddings = False
embedding_model='text-embedding-ada-002'
n_clusters = 3  # Determined the number of clusters to use
# Set to none to truly randomize. 42 used in code to reproduce samples to what is in openai docs.
random_state = 40

In [2]:
# Read Data
df = pd.read_csv(directory+file_answers, index_col=0)
df = df[['Question_ID', 'Type', 'Question', 'Answer', 'Correct_answer',
         'Curiosity', 'Hunger', 'Smarts','Relevance',
         'Curiosity_optimum', 'Hunger_optimum', 'Smarts_optimum','Relevance_optimum']]

# metrics = ['Curiousity', 'Hunger', 'Smarts']
df_metrics = pd.read_csv(directory+file_metrics, index_col=0)
df_metrics = df_metrics[['Metric','Category_term']]
metrics = df_metrics['Metric'].unique()
metrics

array(['Curiosity', 'Hunger', 'Smarts'], dtype=object)

In [3]:
#%% Create open_ended_answer object and embeddings
question = []
ans = []
for i in range(len(df['Question_ID'].unique())):
    q_ID = df['Question_ID'].unique()[i]
    question.append(df['Question'][df.index[df['Question_ID'] == q_ID].tolist()[0]])
    ans.append(open_ended_tools.OpenEndedAnswer(df[df['Question_ID'] == q_ID], metrics))

    ans[i].generate_answer_embeddings(directory+file_answers[:-4]+f'_{q_ID}.csv', 
                                      random_state=random_state, 
                                      generate_embeddings=generate_embeddings,
                                      embedding_model=embedding_model)
    print(ans[i])

Embeddings file read.
Question: 1, Describe a lightweight concept for an aircraft fuselage longitudinal stringer, with manufacturing methods that could be used. Its function is to make a stiffened skin monocoque structure. The stringer will take axial loads and prevent skin buckling. There are no cost or schedule constraints.
# of Answers in Model: 34
Metrics: ['Curiosity' 'Hunger' 'Smarts']
# of Clusters: None

Embeddings file read.
Question: 2, Identify the pattern in these numbers: 854976320
# of Answers in Model: 20
Metrics: ['Curiosity' 'Hunger' 'Smarts']
# of Clusters: None



In [4]:
#%% Create metric embeddings
met = []
for i in range(len(df_metrics['Metric'].unique())):
    met_name = df_metrics['Metric'].unique()[i]
    met.append(open_ended_tools.OpenEndedMetric(df_metrics[df_metrics['Metric'] == met_name]))

    met[i].generate_metric_embeddings(directory+file_metrics,
                                      generate_embeddings=True, 
                                      embedding_model=embedding_model)

Embeddings created.
Embeddings created.
Embeddings created.


In [5]:
met[2].df

Unnamed: 0_level_0,Metric,Category_term,embedding
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5,Smarts,Intelligence,"[-0.007757922168821096, -0.00983770564198494, ..."
6,Smarts,Correctness,"[0.014180214144289494, -0.011563150212168694, ..."
7,Smarts,Feasibility,"[-0.0001759278675308451, -0.0315532423555851, ..."


In [6]:
ans[0].df

Unnamed: 0,ID,Question_ID,Type,Question,Answer,Correct_answer,Curiosity,Hunger,Smarts,Relevance,Curiosity_optimum,Hunger_optimum,Smarts_optimum,Relevance_optimum,embedding
0,1,1,Multiple,Describe a lightweight concept for an aircraft...,A minimum weight concept for an aircraft fusel...,,0,0,1,1,1,0,1,1,"[-0.004058697260916233, 0.03430645912885666, -..."
1,2,1,Multiple,Describe a lightweight concept for an aircraft...,One possible concept for a minimum weight airc...,,0,0,1,1,1,0,1,1,"[2.1099383957334794e-05, 0.033611960709095, -0..."
2,3,1,Multiple,Describe a lightweight concept for an aircraft...,Use a composite material such as carbon fiber ...,,0,0,1,1,1,0,1,1,"[0.007572090718895197, 0.04471009597182274, -0..."
3,4,1,Multiple,Describe a lightweight concept for an aircraft...,The stringer could be manufactured with carbon...,,0,0,1,1,1,0,1,1,"[-0.004457200411707163, 0.010434738360345364, ..."
4,5,1,Multiple,Describe a lightweight concept for an aircraft...,Rather than make a new component and add a str...,,1,0,1,1,1,0,1,1,"[-0.00869241077452898, 0.04867749661207199, 0...."
5,6,1,Multiple,Describe a lightweight concept for an aircraft...,Material: The stringer could be made of a high...,,0,0,1,1,1,0,1,1,"[0.0014477220829576254, 0.03042137622833252, -..."
6,7,1,Multiple,Describe a lightweight concept for an aircraft...,There is not enough information provided to an...,,-1,-1,-1,-1,1,0,1,1,"[0.017032714560627937, 0.0015377630479633808, ..."
7,8,1,Multiple,Describe a lightweight concept for an aircraft...,"Given enough time, I could answer this questio...",,-1,-1,-1,-1,1,0,1,1,"[-0.004106028471142054, -0.023306598886847496,..."
8,9,1,Multiple,Describe a lightweight concept for an aircraft...,Material: The stringer could be made of a ligh...,,1,0,1,1,1,0,1,1,"[0.006133295129984617, 0.026939189061522484, -..."
9,10,1,Multiple,Describe a lightweight concept for an aircraft...,Material: The stringer could be made of a ligh...,,1,0,1,1,1,0,1,1,"[-0.0046913716942071915, 0.015704255551099777,..."


In [19]:
print(met[2].df['Category_term'])

ID
5    Intelligence
6     Correctness
7     Feasibility
Name: Category_term, dtype: object


In [18]:
#%% Test out embeddings scoring
open_ended_tools.metric_score(met[2],ans[0])

[[0.7264828512949829, 0.7343832462610814, 0.7677126903947951],
 [0.712909445214868, 0.7217201502103398, 0.7546464085543949],
 [0.7121182275827823, 0.7170998778973509, 0.7424764590294509],
 [0.7194626088969983, 0.720034778260918, 0.7477158563120728],
 [0.7183337634757091, 0.7190680073214087, 0.7459102567667737],
 [0.7253983836460919, 0.7205145348064912, 0.750354673023194],
 [0.7638901896673788, 0.7574911176607262, 0.7695279994446705],
 [0.7554567746216168, 0.7586756369521797, 0.7663868039646262],
 [0.7286968665987311, 0.726173007708657, 0.7523519728721024],
 [0.7266706415713265, 0.7229680712585042, 0.7478227670659249],
 [0.7336574243239065, 0.7222929834632065, 0.7526733146613283],
 [0.7232770564077325, 0.7217029578340031, 0.7512603703462669],
 [0.7164804197327235, 0.7213149813218557, 0.7529499530992018],
 [0.7267944793272451, 0.7217551454991769, 0.7421782021343672],
 [0.7329895744567164, 0.7195773209210362, 0.741058996452673],
 [0.7109063213587012, 0.7255847515842863, 0.7416922476999076