# Tutorial to use construct-tracker for creating lexicons with Generative AI


- Author: Daniel M. Low
- License: Apache 2.0
- Date: 01/30/2024

In [22]:
import sys
import os
import litellm
sys.path.append( './../../concept-tracker/') # TODO: replace with pip install construct-tracker
from concept_tracker import lexicon
from concept_tracker import api_keys # local

# Load API keys

In [23]:

os.environ["OPENAI_API_KEY"]  = api_keys.open_ai  # str, API key and put at least $5 in the account
os.environ["COHERE_API_KEY"] = api_keys.cohere_trial # str, free trial API key, 5 requests per minute
gpt4_turbo = "gpt-4-1106-preview"
cohere = 'command-nightly'
litellm.drop_params=True # will ignore paramaters you set if they don't belong in a model




# Create lexicon for a new construct with GenAI


In [24]:
l = lexicon.Lexicon()         # Initialize lexicon
l.add('Insight', section = 'tokens', value = 'create', source = cohere, max_tokens = 150)
print(l.constructs['Insight']['tokens'])



['acuity', 'and', 'articulation', 'astuteness', 'awareness', 'characterization', 'clarity', 'comprehension', 'conceptualization', 'creativity', 'cunning', 'description', 'discernment', 'eloquence', 'emotional realization', 'end realization', 'enlightenment', 'epiphany', 'expression', 'foresight', 'hindsight', 'identification', 'imagination', 'insight', 'intellectual realization', 'introspection', 'intuition', 'lucidity', 'observation', 'penetration', 'perception', 'realization', 'realization of consequence', 'realization of potential', 'realization of purpose', 'realization of truth', 'recognition', 'result realization', 'sagacity', 'savvy', 'self-awareness', 'sharpness', 'sixth sense', 'spiritual realization', 'street smarts', 'three hundred sixty degree perspective', 'understanding', 'verbalization', 'vision', 'wisdom', 'wit']


# Extract counts

In [25]:
documents = ["Every time I speak with my cousin Bob, I have great moments of insight, clarity, and wisdom",
             "He meditates a lot, but he's not super smart"]
feature_vectors, matches_counter_d, matches_per_doc, matches_per_construct  = lexicon.extract(documents,
                                                                                      l.constructs,
                                                                                      normalize = False,
                                                                                      )
feature_vectors

100%|█████████████████████████████████████████████| 1/1 [00:01<00:00,  1.14s/it]


Unnamed: 0,Insight,word_count
0,4,17
1,0,8


# Add definition and examples to prompt


In [26]:
construct = 'Insight'
definition = "understanding or awareness of one's mental or emotional state or condition"
examples = 'insight; realized; learn; breakthrough'
prompt = lexicon.generate_prompt(construct,
                         prompt_name=construct,
                         domain = 'mental health',
						 definition = definition,
						 examples = examples)
print(prompt)

# Save definition and examples in lexicon
l.constructs[construct]['definition'] = definition
l.constructs[construct]['definition_references'] = 'DML'
l.constructs[construct]['examples'] = examples


Provide many single words and some short phrases related to Insight (in the mental health domain). Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any explanation or additional text beyond the tokens.
Here is a definition of Insight: understanding or awareness of one's mental or emotional state or condition
Here are some examples (include these in the list): insight; realized; learn; breakthrough.


# Try GPT-4 Turbo model


In [27]:
l.add(construct, section = 'tokens', value = 'create', prompt = prompt, source = gpt4_turbo, temperature = 0.5, top_p=0.9, seed = 42, max_tokens = 150)
l.constructs[construct]['tokens'] # Will merge with other tokens already generated
# I do this 3 times with different temperatures

['acuity',
 'aha moment',
 'and',
 'articulation',
 'astuteness',
 'awakening',
 'awareness',
 'breakthrough',
 'characterization',
 'clarity',
 'cognizance',
 'comprehension',
 'conceptualization',
 'consciousness',
 'creativity',
 'cunning',
 'description',
 'discernment',
 'discovery',
 'eloquence',
 'emotional realization',
 'end realization',
 'enlightenment',
 'epiphany',
 'expression',
 'foresight',
 'grasp',
 'hindsight',
 'identification',
 'illumination',
 'imagination',
 'insight',
 'intellectual realization',
 'introspection',
 'intuition',
 'learn',
 'lucidity',
 'mental',
 'mental clarity',
 'mindfulness',
 'observation',
 'penetration',
 'perception',
 'realization',
 'realization of consequence',
 'realization of potential',
 'realization of purpose',
 'realization of truth',
 'realized',
 'recognition',
 'reflection',
 'result realization',
 'revelation',
 'sagacity',
 'savvy',
 'self-analysis',
 'self-appraisal',
 'self-assessment',
 'self-awareness',
 'self-conscious

In [30]:
# see which tokens were generated by GPT-4 Turbo. Find specific source
print(list(l.constructs[construct]['tokens_metadata'].keys()))

['command-nightly, temperature-0.1, top_p-1, max_tokens-150, seed-42, 24-01-30T18-51-45', 'gpt-4-1106-preview, temperature-0.5, top_p-0.9, max_tokens-150, seed-42, 24-01-30T18-55-52']


In [31]:
source = 'gpt-4-1106-preview, temperature-0.5, top_p-0.9, max_tokens-150, seed-42, 24-01-30T18-55-52'
l.constructs[construct]['tokens_metadata'][source]

{'add_or_remove': 'add',
 'tokens': ['aha moment',
  'awakening',
  'awareness',
  'breakthrough',
  'clarity',
  'cognizance',
  'comprehension',
  'consciousness',
  'discernment',
  'discovery',
  'enlightenment',
  'epiphany',
  'grasp',
  'illumination',
  'introspection',
  'learn',
  'mental',
  'mental clarity',
  'mindfulness',
  'perception',
  'realization',
  'realized',
  'recognition',
  'reflection',
  'revelation',
  'self-analysis',
  'self-appraisal',
  'self-assessment',
  'self-awareness',
  'self-consciousness',
  'self-consideration',
  'self-contemplation',
  'self-discovery',
  'self-evaluation',
  'self-examination',
  'self-exploration',
  'self-inquiry',
  'self-knowledge',
  'self-observation',
  'self-perception',
  'self-questioning',
  'self-realization',
  'self-recognition',
  'self-reflection',
  'self-scrutiny',
  'self-understanding',
  'understanding'],
 'prompt': "Provide many single words and some short phrases related to Insight (in the mental he

# Manually add or remove

In [32]:
l.remove(construct, source = 'DML manually removing', remove_tokens = ['perception'])
l.add(construct, section ='tokens',value = ['recognize'], source="DML added manually")
l.constructs[construct]['tokens']

['acuity',
 'aha moment',
 'and',
 'articulation',
 'astuteness',
 'awakening',
 'awareness',
 'breakthrough',
 'characterization',
 'clarity',
 'cognizance',
 'comprehension',
 'conceptualization',
 'consciousness',
 'creativity',
 'cunning',
 'description',
 'discernment',
 'discovery',
 'eloquence',
 'emotional realization',
 'end realization',
 'enlightenment',
 'epiphany',
 'expression',
 'foresight',
 'grasp',
 'hindsight',
 'identification',
 'illumination',
 'imagination',
 'insight',
 'intellectual realization',
 'introspection',
 'intuition',
 'learn',
 'lucidity',
 'mental',
 'mental clarity',
 'mindfulness',
 'observation',
 'penetration',
 'realization',
 'realization of consequence',
 'realization of potential',
 'realization of purpose',
 'realization of truth',
 'realized',
 'recognition',
 'recognize',
 'reflection',
 'result realization',
 'revelation',
 'sagacity',
 'savvy',
 'self-analysis',
 'self-appraisal',
 'self-assessment',
 'self-awareness',
 'self-consciousn

# Add another construct and generate definition with GPT-4 Turbo

In [33]:
construct = 'Mindfulness'
examples = 'mindfulness; meditation; Buddhism'
definition = lexicon.api_request(f'Very brief definition of {construct}', model=gpt4_turbo)
print(definition)

Mindfulness is the practice of being fully present and engaged in the moment, aware of one's thoughts and feelings without judgment or distraction.


In [34]:
# Generate prompt
prompt = lexicon.generate_prompt(construct, definition = definition, examples = examples)
print(prompt)

Provide many single words and some short phrases related to mindfulness. Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any explanation or additional text beyond the tokens.
Here is a definition of mindfulness: mindfulness is the practice of being fully present and engaged in the moment, aware of one's thoughts and feelings without judgment or distraction.
Here are some examples (include these in the list): mindfulness; meditation; Buddhism.


In [35]:
# Create tokens
l.add(construct, section = 'tokens', value = 'create', prompt = prompt, source = gpt4_turbo, temperature = 0.5, top_p=0.9, seed = 42, max_tokens = 150)
print(l.constructs[construct]['tokens'])




['Buddhism', 'acceptance', 'attention', 'awareness', 'balance', 'breath', 'breathe', 'calm', 'centeredness', 'clarity', 'compassion', 'connection', 'consciousness', 'contemplation', 'detachment', 'discernment', 'ease', 'empathy', 'equanimity', 'flow', 'focus', 'grounding', 'harmony', 'impermanence', 'insight', 'intention', 'intuition', 'journey', 'kindness', 'letting go', 'loving-kindness', 'meditation', 'mindfulness', 'moment', 'non-attachment', 'non-judgmental', 'observe', 'openness', 'patience', 'pause', 'peace', 'perception', 'practice', 'presence', 'present', 'quietude', 'reflection', 'relaxation', 'serenity', 'silence', 'simplicity', 'solitude', 'stability', 'stillness', 'thoughtfulness', 'tranquility', 'understanding', 'vigilance', 'wakefulness', 'yin', 'zazen', 'zen']


# Add important missing tokens


In [36]:
l.add(construct, section ='tokens',value = ['meditate'], source="DML added manually")

# Extract

In [37]:

feature_vectors, matches_counter_d, matches_per_doc, matches_per_construct  = lexicon.extract(documents,
                                                                                      l.constructs,
                                                                                      normalize = False,
                                                                                      return_matches=True,
                                                                                      add_lemmatized_lexicon=True,
                                                                                      lemmatize_docs=False,
                                                                                      exact_match_n = 4,
                                                                                      )
feature_vectors

100%|█████████████████████████████████████████████| 2/2 [00:01<00:00,  1.48it/s]


Unnamed: 0,Insight,Mindfulness,word_count
0,4,3,17
1,0,1,8


In [38]:
matches_per_doc

{0: {'Insight': (4, ['and', 'clarity', 'insight', 'wisdom']),
  'Mindfulness': (3, ['clarity', 'insight', 'moment'])},
 1: {'Insight': (0, []), 'Mindfulness': (1, ['meditate'])}}

In [39]:
# Save to lexicon
l.constructs[construct]['definition'] = definition
l.constructs[construct]['definition_references'] = gpt4_turbo
l.constructs[construct]['examples'] = examples

In [40]:
l.name = 'Insight lexicon'
l.description = 'Insight lexicon with constructs inspired by items of the Emotional Insight Scale'
l.save('./../data/lexicons/insight_lexicon')