# Tutorial to use construct-tracker for creating lexicons with Generative AI


- Author: Daniel M. Low
- License: Apache 2.0
- Date: 01/30/2024

In [10]:
import sys
import os
import litellm
sys.path.append( './../src/') # TODO: replace with pip install construct-tracker
from construct_tracker import lexicon


In [11]:
from construct_tracker import api_keys # local

# Load API keys

In [19]:

os.environ["OPENAI_API_KEY"]  = api_keys.open_ai  # str, API key and put at least $5 in the account
os.environ["COHERE_API_KEY"] = api_keys.cohere_trial # str, free trial API key, 5 requests per minute
gpt4o = "gpt-4o-2024-05-13"
cohere = 'command-nightly'
litellm.drop_params=True # will ignore paramaters you set if they don't belong in a model





# Create lexicon for a new construct with GenAI


In [13]:
l = lexicon.Lexicon()         # Initialize lexicon
l.add('Insight', section = 'tokens', value = 'create', source = cohere, max_tokens = 150)
print(l.constructs['Insight']['tokens'])

  from .autonotebook import tqdm as notebook_tqdm


['ah-ha moment', 'awareness', 'breakthrough', 'comprehend', 'comprehension', 'dawning', 'discern', 'discernment', 'discovery', 'enlightenment', 'epiphanic', 'epiphany', 'eureka moment', 'foresight', 'grasp', 'insight', 'intuition', 'know', 'knowledge', 'light bulb moment', 'penny dropping', 'perceive', 'perception', 'realization', 'recognize', 'revelation', 'see', 'understand', 'understanding', 'vision', 'wisdom']


# Extract counts

In [14]:
documents = ["Every time I speak with my cousin Bob, I have great moments of insight, clarity, and wisdom",
             "He meditates a lot, but he's not super smart"]
feature_vectors, matches_counter_d, matches_per_doc, matches_per_construct  = lexicon.extract(documents,
                                                                                      l.constructs,
                                                                                      normalize = False,
                                                                                      )
feature_vectors

extracting... 


100%|██████████| 1/1 [00:00<00:00,  1.35it/s]


Unnamed: 0,Insight,word_count
0,2,17
1,0,8


In [15]:
matches_per_construct

{'Insight': [(2, ['insight', 'wisdom']), (0, [])]}

In [16]:
matches_counter_d

{'Insight': {'insight': 1, 'wisdom': 1}}

In [17]:
matches_per_doc

{0: {'Insight': (2, ['insight', 'wisdom'])}, 1: {'Insight': (0, [])}}

# Add definition and examples to prompt


In [18]:
construct = 'Insight'
definition = "understanding or awareness of one's mental or emotional state or condition"
examples = 'insight; realized; learn; breakthrough'
prompt = lexicon.generate_prompt(construct,
                         prompt_name=construct,
                         domain = 'mental health',
						 definition = definition,
						 examples = examples)
print(prompt)

# Save definition and examples in lexicon
l.constructs[construct]['definition'] = definition
l.constructs[construct]['definition_references'] = 'DML'
l.constructs[construct]['examples'] = examples


Provide many single words and some short phrases related to Insight (in the mental health domain). Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any explanation or additional text beyond the tokens.
Here is a definition of Insight: understanding or awareness of one's mental or emotional state or condition
Here are some examples (include these in the list): insight; realized; learn; breakthrough.


# Try GPT-4o model


In [20]:
l.add(construct, section = 'tokens', value = 'create', prompt = prompt, source = gpt4o, temperature = 0.5, top_p=0.9, seed = 42, max_tokens = 150)
l.constructs[construct]['tokens'] # Will merge with other tokens already generated
# I do this 3 times with different temperatures

['acknowledgment',
 'acuity',
 'ah-ha moment',
 'awareness',
 'awareness shift',
 'breakthrough',
 'clarity',
 'cognition',
 'comprehend',
 'comprehension',
 'consciousness',
 'dawning',
 'discern',
 'discernment',
 'discovery',
 'emotional intelligence',
 'enlightenment',
 'epiphanic',
 'epiphany',
 'eureka moment',
 'foresight',
 'grasp',
 'inner peace',
 'insight',
 'introspection',
 'intuition',
 'know',
 'knowledge',
 'learn',
 'light bulb moment',
 'lucidity',
 'mental clarity',
 'mindfulness',
 'observation',
 'penny dropping',
 'perceive',
 'perception',
 'perspective',
 'realization',
 'realized',
 'recognition',
 'recognize',
 'revelation',
 'see',
 'self-awareness',
 'self-discovery',
 'self-knowledge',
 'self-perception',
 'self-recognition',
 'self-reflection',
 'understand',
 'understanding',
 'vision',
 'wisdom']

In [21]:
# see which tokens were generated by GPT-4 Turbo. Find specific source
print(list(l.constructs[construct]['tokens_metadata'].keys()))

['command-nightly, temperature-0.1, top_p-1, max_tokens-150, seed-42, 24-07-24T17-31-06', 'gpt-4o-2024-05-13, temperature-0.5, top_p-0.9, max_tokens-150, seed-42, 24-07-24T17-32-57']


In [23]:
# Add entry name here:
source = 'gpt-4o-2024-05-13, temperature-0.5, top_p-0.9, max_tokens-150, seed-42, 24-07-24T17-32-57'
l.constructs[construct]['tokens_metadata'][source]

{'add_or_remove': 'add',
 'tokens': ['acknowledgment',
  'acuity',
  'awareness',
  'awareness shift',
  'breakthrough',
  'clarity',
  'cognition',
  'comprehension',
  'consciousness',
  'discernment',
  'discovery',
  'emotional intelligence',
  'enlightenment',
  'epiphany',
  'foresight',
  'grasp',
  'inner peace',
  'insight',
  'introspection',
  'intuition',
  'knowledge',
  'learn',
  'lucidity',
  'mental clarity',
  'mindfulness',
  'observation',
  'perception',
  'perspective',
  'realization',
  'realized',
  'recognition',
  'revelation',
  'self-awareness',
  'self-discovery',
  'self-knowledge',
  'self-perception',
  'self-recognition',
  'self-reflection',
  'understanding',
  'wisdom'],
 'prompt': "Provide many single words and some short phrases related to Insight (in the mental health domain). Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any explanation or additional text beyond the tokens.\nHere is a definition of

# Manually add or remove

In [24]:
l.remove(construct, source = 'DML manually removing', remove_tokens = ['perception'])
l.add(construct, section ='tokens',value = ['recognize'], source="DML added manually")
l.constructs[construct]['tokens']

['acknowledgment',
 'acuity',
 'ah-ha moment',
 'awareness',
 'awareness shift',
 'breakthrough',
 'clarity',
 'cognition',
 'comprehend',
 'comprehension',
 'consciousness',
 'dawning',
 'discern',
 'discernment',
 'discovery',
 'emotional intelligence',
 'enlightenment',
 'epiphanic',
 'epiphany',
 'eureka moment',
 'foresight',
 'grasp',
 'inner peace',
 'insight',
 'introspection',
 'intuition',
 'know',
 'knowledge',
 'learn',
 'light bulb moment',
 'lucidity',
 'mental clarity',
 'mindfulness',
 'observation',
 'penny dropping',
 'perceive',
 'perspective',
 'realization',
 'realized',
 'recognition',
 'recognize',
 'revelation',
 'see',
 'self-awareness',
 'self-discovery',
 'self-knowledge',
 'self-perception',
 'self-recognition',
 'self-reflection',
 'understand',
 'understanding',
 'vision',
 'wisdom']

# Add another construct and generate definition with GPT-4 Turbo

In [25]:
construct = 'Mindfulness'
examples = 'mindfulness; meditation; Buddhism'
definition = lexicon.api_request(f'Very brief definition of {construct}', model=gpt4o)
print(definition)

Mindfulness is the practice of being fully present and engaged in the current moment, aware of your thoughts, feelings, and surroundings without judgment.


In [26]:
# Generate prompt
prompt = lexicon.generate_prompt(construct, definition = definition, examples = examples)
print(prompt)

Provide many single words and some short phrases related to mindfulness. Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any explanation or additional text beyond the tokens.
Here is a definition of mindfulness: mindfulness is the practice of being fully present and engaged in the current moment, aware of your thoughts, feelings, and surroundings without judgment.
Here are some examples (include these in the list): mindfulness; meditation; Buddhism.


In [27]:
# Create tokens
l.add(construct, section = 'tokens', value = 'create', prompt = prompt, source = gpt4o, temperature = 0.5, top_p=0.9, seed = 42, max_tokens = 150)
print(l.constructs[construct]['tokens'])




['Buddhism', 'acceptance', 'attention', 'awareness', 'awareness of breath', 'balance', 'being', 'body-scan', 'breath', 'calm', 'clarity', 'compassion', 'concentration', 'consciousness', 'dharma', 'equanimity', 'flow', 'focus', 'gentle', 'gratitude', 'grounding', 'harmony', 'inner-peace', 'insight', 'intention', 'kindness', 'loving-kindness', 'mantra', 'meditation', 'metta', 'mindful-breathing', 'mindful-eating', 'mindful-list', 'mindful-walking', 'mindfulness', 'moment', 'nonjudgment', 'now', 'observation', 'openness', 'patience', 'peace', 'perspective', 'presence', 'quiet', 'reflection', 'relaxation', 'samadhi', 'self-awareness', 'self-compassion', 'serenity', 'shavasana', 'silence', 'simplicity', 'stillness', 'tranquility', 'understanding', 'vipassana', 'zen']


# Add important missing tokens


In [28]:
l.add(construct, section ='tokens',value = ['meditate'], source="DML added manually")

# Extract

In [30]:
documents

['Every time I speak with my cousin Bob, I have great moments of insight, clarity, and wisdom',
 "He meditates a lot, but he's not super smart"]

In [29]:

feature_vectors, matches_counter_d, matches_per_doc, matches_per_construct  = lexicon.extract(documents,
                                                                                      l.constructs,
                                                                                      normalize = False,
                                                                                      return_matches=True,
                                                                                      add_lemmatized_lexicon=True,
                                                                                      lemmatize_docs=False,
                                                                                      exact_match_n = 4,
                                                                                      )
feature_vectors

extracting... 


100%|██████████| 2/2 [00:01<00:00,  1.50it/s]


Unnamed: 0,Insight,Mindfulness,word_count
0,3,3,17
1,0,1,8


In [31]:
matches_per_doc

{0: {'Insight': (3, ['clarity', 'insight', 'wisdom']),
  'Mindfulness': (3, ['clarity', 'insight', 'moment'])},
 1: {'Insight': (0, []), 'Mindfulness': (1, ['meditate'])}}

In [33]:
l.add(construct, section = 'definition', value = definition)
l.constructs[construct]['definition'] = definition


In [34]:
l.constructs[construct]['definition'] 

'Mindfulness is the practice of being fully present and engaged in the current moment, aware of your thoughts, feelings, and surroundings without judgment.'

In [37]:
l.constructs[construct]['definition'] = 'banana'

In [39]:
# Save to lexicon

l.constructs[construct]['definition_references'] = gpt4o
l.constructs[construct]['examples'] = examples
l.name = 'Insight lexicon'
l.description = 'Insight lexicon with constructs inspired by items of the Emotional Insight Scale'

In [40]:

l.save('./../src/construct_tracker/data/lexicons/insight_lexicon')