# Tutorial to use construct-tracker for creating lexicons with Generative AI


- Author: Daniel M. Low
- License: Apache 2.0
- Date: 01/30/2024

In [1]:
import sys
import os
import litellm
sys.path.append( './../../concept-tracker/') # TODO: replace with pip install construct-tracker
from concept_tracker import lexicon
from concept_tracker import api_keys # local





# Load API keys

In [2]:

os.environ["OPENAI_API_KEY"]  = api_keys.open_ai  # str, API key and put at least $5 in the account
os.environ["COHERE_API_KEY"] = api_keys.cohere_trial # str, free trial API key, 5 requests per minute
gpt4_turbo = "gpt-4-1106-preview"
cohere = 'command-nightly'
litellm.drop_params=True # will ignore paramaters you set if they don't belong in a model




# Create lexicon for a new construct with GenAI


In [3]:
l = lexicon.Lexicon()         # Initialize lexicon
l.add('Insight', section = 'tokens', value = 'create', source = cohere, max_tokens = 150)
print(l.constructs['Insight']['tokens'])

  from .autonotebook import tqdm as notebook_tqdm


['aha moment', 'awareness', 'becomes clear', 'clarity', 'comprehend', 'comprehension', 'connects the dots', 'dawns on', 'discern', 'discernment', 'enlightenment', 'epiphany', 'eureka moment', 'gains insight', 'grasp', 'has an insight', 'insight', 'knowledge', 'light bulb moment', 'makes sense', 'penny drops', 'perceive', 'perception', 'realization', 'recognize', 'see', 'sees the light', 'understand', 'understanding', 'vision', 'wisdom']


# Extract counts

In [4]:
documents = ["Every time I speak with my cousin Bob, I have great moments of insight, clarity, and wisdom",
             "He meditates a lot, but he's not super smart"]
feature_vectors, matches_counter_d, matches_per_doc, matches_per_construct  = lexicon.extract(documents,
                                                                                      l.constructs,
                                                                                      normalize = False,
                                                                                      )
feature_vectors

extracting... 


100%|██████████| 1/1 [00:00<00:00,  1.76it/s]


Unnamed: 0,Insight,word_count
0,3,17
1,0,8


In [29]:
matches_per_construct

{'Insight': [(1, ['insight']), (0, [])]}

In [30]:
matches_counter_d

{'Insight': {'insight': 1}}

In [31]:
matches_per_doc

{0: {'Insight': (1, ['insight'])}, 1: {'Insight': (0, [])}}

# Add definition and examples to prompt


In [33]:
construct = 'Insight'
definition = "understanding or awareness of one's mental or emotional state or condition"
examples = 'insight; realized; learn; breakthrough'
prompt = lexicon.generate_prompt(construct,
                         prompt_name=construct,
                         domain = 'mental health',
						 definition = definition,
						 examples = examples)
print(prompt)

# Save definition and examples in lexicon
l.constructs[construct]['definition'] = definition
l.constructs[construct]['definition_references'] = 'DML'
l.constructs[construct]['examples'] = examples


Provide many single words and some short phrases related to Insight (in the mental health domain). Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any explanation or additional text beyond the tokens.
Here is a definition of Insight: understanding or awareness of one's mental or emotional state or condition
Here are some examples (include these in the list): insight; realized; learn; breakthrough.


# Try GPT-4 Turbo model


In [34]:
l.add(construct, section = 'tokens', value = 'create', prompt = prompt, source = gpt4_turbo, temperature = 0.5, top_p=0.9, seed = 42, max_tokens = 150)
l.constructs[construct]['tokens'] # Will merge with other tokens already generated
# I do this 3 times with different temperatures

['Awareness',
 'Enlightenment',
 'Grasped',
 'Grasping',
 'Graspingness',
 'Insightful',
 'Insightfully',
 'Insightfulness',
 'Intuition',
 'Perceptiveness',
 'Realization',
 'Understanding',
 'Vision',
 'aha moment',
 'apprehension',
 'awakening',
 'awareness',
 'breakthrough',
 'clarity',
 'cognizance',
 'comprehension',
 'consciousness',
 'discernment',
 'discovery',
 'enlightenment',
 'epiphany',
 'grasp',
 'illumination',
 'inner wisdom',
 'insight',
 'introspection',
 'intuitive understanding',
 'learn',
 'mental clarity',
 'mindfulness',
 'perception',
 'psychological insight',
 'realization',
 'realized',
 'recognition',
 'revelation',
 'self-analysis',
 'self-appraisal',
 'self-assessment',
 'self-awareness',
 'self-concept',
 'self-consciousness',
 'self-contemplation',
 'self-discovery',
 'self-examination',
 'self-inquiry',
 'self-interpretation',
 'self-knowledge',
 'self-observation',
 'self-perception',
 'self-realization',
 'self-recognition',
 'self-reflection',
 'self

In [14]:
# see which tokens were generated by GPT-4 Turbo. Find specific source
print(list(l.constructs[construct]['tokens_metadata'].keys()))

['command-nightly, temperature-0.1, top_p-1, max_tokens-150, seed-42, 24-03-11T18-40-50', 'gpt-4-1106-preview, temperature-0.5, top_p-0.9, max_tokens-150, seed-42, 24-03-11T18-47-32']


In [15]:
source = 'gpt-4-1106-preview, temperature-0.5, top_p-0.9, max_tokens-150, seed-42, 24-03-11T18-47-32'
l.constructs[construct]['tokens_metadata'][source]

{'add_or_remove': 'add',
 'tokens': ['ah-ha moment',
  'aha moment',
  'awakening',
  'awareness',
  'breakthrough',
  'clarity',
  'cognition',
  'cognitive awareness',
  'comprehension',
  'consciousness',
  'discernment',
  'discovery',
  'emotional intelligence',
  'enlightenment',
  'epiphany',
  'grasp',
  'inner wisdom',
  'insight',
  'introspection',
  'intuition',
  'learn',
  'mental clarity',
  'mental revelation',
  'mindfulness',
  'perception',
  'personal growth',
  'psychological insight',
  'realized',
  'recognition',
  'reflection',
  'revelation',
  'self-analysis',
  'self-appraisal',
  'self-assessment',
  'self-awareness',
  'self-consciousness',
  'self-contemplation',
  'self-discovery',
  'self-examination',
  'self-inquiry',
  'self-knowledge',
  'self-observation',
  'self-perception',
  'self-realization',
  'self-recognition',
  'self-reflection',
  'self-understanding',
  'therapeutic insight',
  'understanding'],
 'prompt': "Provide many single words an

# Manually add or remove

In [16]:
l.remove(construct, source = 'DML manually removing', remove_tokens = ['perception'])
l.add(construct, section ='tokens',value = ['recognize'], source="DML added manually")
l.constructs[construct]['tokens']

['Awareness',
 'Enlightenment',
 'Grasped',
 'Grasping',
 'Graspingness',
 'Insightful',
 'Insightfully',
 'Insightfulness',
 'Intuition',
 'Perceptiveness',
 'Realization',
 'Understanding',
 'Vision',
 'ah-ha moment',
 'aha moment',
 'awakening',
 'awareness',
 'breakthrough',
 'clarity',
 'cognition',
 'cognitive awareness',
 'comprehension',
 'consciousness',
 'discernment',
 'discovery',
 'emotional intelligence',
 'enlightenment',
 'epiphany',
 'grasp',
 'inner wisdom',
 'insight',
 'introspection',
 'intuition',
 'learn',
 'mental clarity',
 'mental revelation',
 'mindfulness',
 'personal growth',
 'psychological insight',
 'realized',
 'recognition',
 'recognize',
 'reflection',
 'revelation',
 'self-analysis',
 'self-appraisal',
 'self-assessment',
 'self-awareness',
 'self-consciousness',
 'self-contemplation',
 'self-discovery',
 'self-examination',
 'self-inquiry',
 'self-knowledge',
 'self-observation',
 'self-perception',
 'self-realization',
 'self-recognition',
 'self-r

# Add another construct and generate definition with GPT-4 Turbo

In [35]:
construct = 'Mindfulness'
examples = 'mindfulness; meditation; Buddhism'
definition = lexicon.api_request(f'Very brief definition of {construct}', model=gpt4_turbo)
print(definition)

Mindfulness is the practice of being fully present and engaged in the moment, aware of one's thoughts and feelings without judgment or distraction.


In [36]:
# Generate prompt
prompt = lexicon.generate_prompt(construct, definition = definition, examples = examples)
print(prompt)

Provide many single words and some short phrases related to mindfulness. Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any explanation or additional text beyond the tokens.
Here is a definition of mindfulness: mindfulness is the practice of being fully present and engaged in the moment, aware of one's thoughts and feelings without judgment or distraction.
Here are some examples (include these in the list): mindfulness; meditation; Buddhism.


In [37]:
# Create tokens
l.add(construct, section = 'tokens', value = 'create', prompt = prompt, source = gpt4_turbo, temperature = 0.5, top_p=0.9, seed = 42, max_tokens = 150)
print(l.constructs[construct]['tokens'])




['Buddhism', 'acceptance', 'attention', 'awareness', 'balance', 'body scan', 'breath', 'breathe', 'calm', 'centering', 'clarity', 'compassion', 'consciousness', 'dharma', 'emotional regulation', 'equanimity', 'focus', 'gentle', 'grounding', 'harmony', 'heartfulness', 'impermanence', 'insight', 'intention', 'letting go', 'loving-kindness', 'meditation', 'metta', 'mindful eating', 'mindful movement', 'mindfulness', 'moment', 'non-attachment', 'non-judgmental', 'non-reactivity', 'observation', 'observe', 'openness', 'patience', 'pause', 'peace', 'practice', 'presence', 'present', 'reflection', 'sati', 'self-awareness', 'serenity', 'silence', 'simplicity', 'spaciousness', 'stillness', 'stress reduction', 'tranquility', 'vipassana', 'zazen', 'zen']


# Add important missing tokens


In [38]:
l.add(construct, section ='tokens',value = ['meditate'], source="DML added manually")

# Extract

In [39]:

feature_vectors, matches_counter_d, matches_per_doc, matches_per_construct  = lexicon.extract(documents,
                                                                                      l.constructs,
                                                                                      normalize = False,
                                                                                      return_matches=True,
                                                                                      add_lemmatized_lexicon=True,
                                                                                      lemmatize_docs=False,
                                                                                      exact_match_n = 4,
                                                                                      )
feature_vectors

extracting... 


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.10it/s]


Unnamed: 0,Insight,Mindfulness,word_count
0,2,3,17
1,0,1,8


In [41]:
matches_per_doc

{0: {'Insight': (2, ['clarity', 'insight']),
  'Mindfulness': (3, ['clarity', 'insight', 'moment'])},
 1: {'Insight': (0, []), 'Mindfulness': (1, ['meditate'])}}

In [None]:
l.add(construct, section = 'definition', value = definition)
l.constructs[construct]['definition'] = definition.copy()


In [44]:
l.constructs[construct]['definition'] 

"Mindfulness is the practice of being fully present and engaged in the moment, aware of one's thoughts and feelings without judgment or distraction."

In [43]:
l.name

'Insight lexicon'

In [45]:
l.constructs[construct]['definition'] = 'banana'

In [42]:
# Save to lexicon

l.constructs[construct]['definition_references'] = gpt4_turbo
l.constructs[construct]['examples'] = examples
l.name = 'Insight lexicon'
l.description = 'Insight lexicon with constructs inspired by items of the Emotional Insight Scale'

In [46]:

l.save('./../data/lexicons/insight_lexicon')