In [1]:
import pandas as pd
from txtai.embeddings import Embeddings

  from .autonotebook import tqdm as notebook_tqdm


## Combine Goals and Objectives (with titles) into one paragraph and make comparison

In [2]:
go = pd.read_csv("./data/processed/goals-objectives-combined.csv", index_col="goal_id")
go.sort_index(inplace=True)
go.reset_index(inplace=True)
go

Unnamed: 0,goal_id,goal_name,goal_objective
0,0,Innovate for Competitive Advantage,Innovation is a key element of future readines...
1,1,Optimize for Efficiencies and Improved Capability,Delivering IT capabilities with greater effici...
2,2,Evolve Cybersecurity for an Agile and Resilien...,The scope pace and sophistication of malicious...
3,3,Cultivate Talent for a Ready Digital Workforce,Competition for high quality experienced digit...


In [3]:
embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"})
embeddings.index([(uid, text, None) for uid, text in enumerate(go.goal_objective.tolist())])

In [4]:
results = pd.DataFrame([(query_id, query, go.iloc[embeddings.search(query, 1)[0][0]]['goal_name'], query_description) for _, query_id, query, query_description in go.to_records()])
results.columns = ["query_id", "query", "resul_goalname", "query_description"]
for _, query_id, query, query_description in go.to_records():
    print(f"{query_id} | {query} | {go.iloc[embeddings.search(query, 1)[0][0]]['goal_name']}")
results.to_csv("./results/goal-objectives-combined-results.csv", index=False)

0 | Innovate for Competitive Advantage | Innovate for Competitive Advantage
1 | Optimize for Efficiencies and Improved Capability | Optimize for Efficiencies and Improved Capability
2 | Evolve Cybersecurity for an Agile and Resilient Defense Posture | Evolve Cybersecurity for an Agile and Resilient Defense Posture
3 | Cultivate Talent for a Ready Digital Workforce | Cultivate Talent for a Ready Digital Workforce


In [5]:
survey = pd.read_excel("./resources/JCAT Export NIPR (APR-15-2022).xlsx")
capabilities = pd.DataFrame(survey[survey.columns[-4]])
capabilities.columns = ["capability_description"]
capabilities.head(1)

Unnamed: 0,capability_description
0,"GDI/ACE-M is a cloud based, multi enclave (SIP..."


In [6]:
results = pd.DataFrame([(capability, *go.loc[embeddings.search(capability, 1)[0][0], ['goal_name', 'goal_objective']].values.tolist()) for _, capability in capabilities.to_records()])
results.columns = ["capability_query", "goal_name", "goal_objective"]
results.head()

Unnamed: 0,capability_query,goal_name,goal_objective
0,"GDI/ACE-M is a cloud based, multi enclave (SIP...",Innovate for Competitive Advantage,Innovation is a key element of future readines...
1,​The NorthStar Information Dominance Platform ...,Innovate for Competitive Advantage,Innovation is a key element of future readines...
2,Bodhi is NRO's Global Situational Awareness pl...,Optimize for Efficiencies and Improved Capability,Delivering IT capabilities with greater effici...
3,Maven Broad Area Surveillance & Targeting (BAS...,Innovate for Competitive Advantage,Innovation is a key element of future readines...
4,​High Accuracy Detection and Exploitation Syst...,Innovate for Competitive Advantage,Innovation is a key element of future readines...


In [7]:
results.to_csv("./results/survey-alignment.csv", index=False)