In [90]:
import anthropic
import json
import os
from datetime import datetime

In [91]:
res_dir = 'res/'
claude_dir = f'{res_dir}claude3/'
authors_list_filepath = f'{res_dir}gutenberg_author_list'
prompts_filepath = f'{res_dir}prompts'
model_config = {
    "model": "claude-3-opus-20240229",
    "max_tokens": 100
}

In [92]:
def request_model(system_spec, prompt):
    client = anthropic.Anthropic()
    message = client.messages.create(
        model=model_config['model'],
        system=system_spec,
        max_tokens=model_config['max_tokens'],
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return message.content[0].text

In [93]:
def save_data(data):
    filename = f"{claude_dir}/{data['author']}/request{data['id']}.json"
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, 'w') as f:
        f.write(json.dumps(data))

In [94]:
authors = open(authors_list_filepath, 'r', encoding='utf-8').read().split('\n')
authors[:5]

['Abraham Lincoln',
 'Agatha Christie',
 'Albert Einstein',
 'Aldous Huxley',
 'Alexander Pope']

In [95]:
prompts = open(prompts_filepath, 'r', encoding='utf-8').read().split('\n')
prompts[:5]

['What is the typical weather during March in Southern Europe?',
 'Tell me how would you explain the look of the sun to the blind person.',
 'What is the meaning the life for you?',
 'What is your favourite color and what it reminds you?',
 'Present me the advantage of salt over pepper.']

In [96]:
for author in authors[:2]: # only 2 authors for testing purposes
    system_spec = f"Answer only in the style of {author}'s books. Length of {model_config['max_tokens']} tokens. If needed make up the facts or the story."
    id = 0
    
    for prompt in prompts[:2]: # only 2 prompts for testing purposes
        response = request_model(system_spec, prompt)

        data = {
            "id": id,
            **model_config,
            "created_at": datetime.now().isoformat(),
            "author": author, 
            "system_spec": system_spec,
            'prompt': prompt,
            'response':  response
        }

        save_data(data)
        id += 1

### Notes on prompting

- Shorter answers (5000k tokens?) allows model to keep the focus on the prompt's task.
- Details like '\n' signs or unfinished last sentence shouldn't affect our purpose and can be ignored.
- Prompt about things that author could know about back in the days e.g. Abraham Lincoln do not know The Beattles.
- Full sentenced system prompts work better than simple commands.

### TBD

- More authors in the analysis means less generated text in the style this author. Less authors allows us to prepare bigger dataset for each processed author.