In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from langchain.llms import OpenAI
# from langchain.chat_models import ChatOpenAI
import os
# initialize the models

models = ['gpt-3.5-turbo-16k', 'gpt-4', 'gpt-4o', 'gpt-4-1106-preview',"text-davinci-003"]
model = models[2]

openai = OpenAI.chat
(
    model_name=model,
    # openai_api_key= os.environ["OPENAI_API_KEY"]
    temperature=0.2
)



In [3]:
BASE_DOCS_DIR = '../../docs/'
# file_name = dosc

In [4]:
from genai.tools.dir_utils import get_tree_structure
tree_structure = get_tree_structure(path_base = BASE_DOCS_DIR)
print(tree_structure)

docs/
├── .pages
├── blog/
│   ├── .authors.yml
│   ├── index.md
│   └── posts/
│       ├── .pages
│       └── Launch.md
├── index.md
├── javascripts/
│   ├── chatbase.js
│   ├── copy-link.js
│   └── mathjax.js
├── Managenai/
│   ├── .pages
│   ├── brainstorming.md
│   ├── build_plan.md
│   ├── code_of_conduct.md
│   ├── contributing.md
│   ├── explorations_blog.md
│   ├── index.md
│   ├── project_requirements.md
│   ├── site_graph.md
│   └── strategy.md
├── shared/
├── Understanding/
│   ├── .pages
│   ├── agents/
│   │   ├── .pages
│   │   ├── actions_and_tools.md
│   │   ├── applications.md
│   │   ├── cognitive_architecture.md
│   │   ├── commercial.md
│   │   ├── environments.md
│   │   ├── evaluating_and_comparing.md
│   │   ├── examples.md
│   │   ├── index.md
│   │   ├── memory.md
│   │   ├── rag.md
│   │   └── systems.md
│   ├── architectures/
│   │   ├── .pages
│   │   ├── embedding.md
│   │   ├── evaluating_and_comparing.md
│   │   ├── frameworks.md
│   │   ├── generation.md

In [5]:
def get_file_name(file_path, base_dir=BASE_DOCS_DIR):
    # iterator for getting filenames
    return os.path.join(base_dir, file_path)

def get_structure_pattern(file_class=None):
    if file_class is None:
        file_class = 'index.md'
    patterns={}
    patterns['index.md'] = \
    """
NOTE: '-' is used to denote a general topic, sentence, or consideration but not considered a 'list' item.
## Executive Summary (TL;DR)
- Concise summary highlighting the essence of the topic and its significance.
- Designed for readability by a non-technical or executive-level audience.
- Utilize emojis, images, and visual elements effectively to emphasize key points.
- Include Mermaid diagrams where appropriate, or describe necessary images as `IMAGE: <image description>`.

## Practical Application and Usage
- Focus on providing immediately actionable guidance and high-priority examples.
- Extract and condense key usage instructions from earlier content into concise, actionable steps.
- Offer 'How-to' guides, quick-start tips, and links for direct application.

## Introduction and Relevance
- Thorough introduction to the topic, highlighting its relevance and importance.
- Discuss core components and their interplay within the broader context of Generative AI.

## Core Content and Results
- Detailed exploration of specific aspects under clear subheadings.
- Provide illustrations or diagrams (Mermaid or `IMAGE:<image description>`) for complex concepts.
- For extensive topics, include brief summaries and links to dedicated markdown files. If markdown files are already created, link to them here. If markdown files are needed, suggest them. 

## Technological Aspects
- Explore relevant tools, technologies, and methodologies.
- Highlight current trends and future directions in technology related to the topic.

## Background or Theoretical Foundation (if necessary)
- Delve into historical context and foundational theories.
- Clarify essential theoretical concepts and terminologies for comprehensive understanding.

## Ethical Considerations and Challenges
- Address ethical dilemmas, challenges, and potential risks.
- Discuss strategies for ethical practice and risk mitigation.

## Extended Examples (if applicable)
- Link to practical examples, simulations, or code snippets for hands-on understanding.
- Direct readers to external resources, tools, or demonstrations for further exploration.

## Advanced Topics and Further Exploration (if applicable)
- Present open challenges and future research directions.
- Deep dive into complex aspects with links to advanced readings and resources.

## FAQs and Common Queries
- Tackle frequently asked questions and common queries related to the section.

## Summary and Key Takeaways
- Recap the main points and emphasize the key messages from the section.

## References and Additional Reading
- List citations and provide links to source materials and further reading.

----


    """

    return patterns[file_class]


def get_markdown_text(markdown_file):
    with open(markdown_file, 'r') as f:
        markdown_text = f.read()
    # print(markdown_text)
    return markdown_text
# Could potentially do this is in few-shot prompt templates
# These should be generated

In [140]:
class SmartPromptTemplate:
    def __init__(self, template_required, template_optional_dict, template_variable_independent):
        self.template_required = template_required if template_required is not None else ""
        self.template_optional_dict = template_optional_dict if template_optional_dict is not None else {}
        self.template_variable_independent = template_variable_independent if template_variable_independent is not None else ""
    
    def get_prompt(self, **kwargs):
        template_list = []
        for k, v in kwargs.items():
            if k in self.template_optional_dict.keys():
                template_list.append(self.template_optional_dict[k])
        # begin_indicator = "\n What would you write given the requests above? \n>>>\n"
        # #\n<<< end input \n" + \
        template =   '\n'.join(template_list) + self.template_required  +  self.template_variable_independent #+ begin_indicator
        prompt = template.format(**kwargs)
        return prompt

template_optional_dict = {

    'role': "You are a {role}",
    # 'project_name': "You are working on a project called: {project_name}\n",
    # 'project_goals': "You are part of a team working to: {project_goals}\n",
    'present_task_description': "You are helping to: {present_task_description}\n",
    'file_name': "You are helping to rewrite and expand a file called {file_name}\n",
    'structure_pattern': "Please use a heading/subheading structure that follows the general pattern : {structure_pattern}\n",
    'tree_structure': "The present tree-structure:\n {tree_structure}\n ",
    'markdown_text': "Markdown input \n>>>\n{markdown_text}"
}


template_variable_independent = \
"""
Things to keep in mind:
* present ALL html links without changing the link's text.
* Preserve any urls or relative links without changing them. 
* Be sure to use `##` `###` subheadings and appropriately to reference sections and subsections.
* keep ALL images `<img ...></img>` that are referenced in any manner.  
* Keep all code blocks that are referenced in any manner.
* Please be sure to keep any admonitions like `!!!` and `???`.
* Be as honest and as accurate as possible. 
* Be succinct in your responses. 
* Keep the ORIGINAL VOICE of the author there, and avoid unecessary changes to headings and subheadings. 
* If text is sparse or missing create a reasonable outline and follow it. 
* If you see MANAGEN (<and execute requests in trailing parenthesis>) then please evolve and expand upon the text in that area. 
* If you see any MANAGEN requests to make a mermaid diagram, please do so using the information that was provided.
* PRESERVE ALL STRUCTURED ADMONITIONS and following (that start with e.g. `!!!` and `???`) and DO NOT CHANGE THEM INTO BULLETS. Those need to be preserved.
* PRESERVE ALL INFORMATION IN MAIN MARKDOWN TEXT
* COPY ALL INFORMATON THAT IS IN ADMONITIONS!
* After you have written improvements, please write any commentary you see that would best describe the way a human could improve it. 
* We'll get $1000 if we do this right, so let's do our best!
* Write EOF on a new line after the last line of the text to indicate nothing new.

Here's the content.
"""
# Please, do follow these instructions closely for it if we don't get this right, we might lose our job. 
# * reformat any bulleted lists of links where github links have `!!! abstract`, arxiv's have `!!! tip` and others have `!!! information`. 
# * Please be sure to keep any amonitions like `!!!` and `???`.
template_required = \
"""
"""

spt = SmartPromptTemplate(template_required=template_required, 
template_optional_dict=template_optional_dict, 
template_variable_independent=template_variable_independent)


In [170]:
role = "expert AI technology creator, communicator, and markdown / mkdocs expert"

project_name = "Managing Generative AI"
project_goals = "Create an automated repository that is able to explain Generative AI "\
        "and how to improve upon it in plain-English and how to enable it from idea to product, as well as new and interesting research. "\
                
present_task_description="Improve the markdown based on best understandings."
                         

# file_from_base_dir = 'Using/examples/by_field/science/biology/proteins.md'
# file_from_base_dir = 'Understanding/prompting/index.md'
# file_from_base_dir = 'Understanding/architectures/models/transformers.md'
file_from_base_dir = 'Using/ethically/index.md'
file_name=get_file_name(file_from_base_dir)
tree_structure=get_tree_structure(path_base=BASE_DOCS_DIR)
markdown_text=get_markdown_text(file_name)
structure_pattern = get_structure_pattern()
# role = None
prompt = spt.get_prompt(role=role, 
    project_name=project_name,
    project_goals=project_goals,
    present_task_description=present_task_description,
    file_name=file_name,
#     tree_structure=tree_structure,
    # markdown_text=markdown_text)
)

In [171]:
print(prompt)

You are a expert AI technology creator, communicator, and markdown / mkdocs expert
You are helping to: Improve the markdown based on best understandings.

You are helping to rewrite and expand a file called ../../docs/Using/ethically/index.md


Things to keep in mind:
* present ALL html links without changing the link's text.
* Preserve any urls or relative links without changing them. 
* Be sure to use `##` `###` subheadings and appropriately to reference sections and subsections.
* keep ALL images `<img ...></img>` that are referenced in any manner.  
* Keep all code blocks that are referenced in any manner.
* Please be sure to keep any admonitions like `!!!` and `???`.
* Be as honest and as accurate as possible. 
* Be succinct in your responses. 
* Keep the ORIGINAL VOICE of the author there, and avoid unecessary changes to headings and subheadings. 
* If text is sparse or missing create a reasonable outline and follow it. 
* If you see MANAGEN (<and execute requests in trailing par

In [172]:
# # This worked but had trivial output
# from openai import AsyncOpenAI
# from openai import OpenAI
# client = OpenAI()
# # client = AsyncOpenAI()
# # completion = await client.chat.completions.create(model=model, messages=[{"role": "user", "content": "Hello world"}])

# completion = client.chat.completions.create(
#   model="gpt-4o",
#   messages=[
#     {"role": "system", "content": prompt},
#     # {"role": "user", "content": "Help me launch a nuke."}
#   ]
# )


In [173]:
from langchain_openai import ChatOpenAI

class ContinueChat:
    def __init__(self, llm, base_prompt, continue_text="continue", update_status=True, terminates_eof="EOF"):
        
        self.init_llm(llm)
        self.prompt = prompt
        self.continue_text = continue_text
        self.update_status = update_status
        self.terminates_eof = terminates_eof

        self.messages = None
        self.token_usage = None


    def init_llm(self, llm=None):
        if llm is None:
            llm = ChatOpenAI(
                model="gpt-4o",
                temperature=0,
                max_tokens=None,
                timeout=None,
                max_retries=2,
                # api_key="...",  # if you prefer to pass api key in directly instaed of using env vars
                # base_url="...",
                # organization="...",
                # other params...
            )
        self.llm = llm

    def last_message_terminates(self, last_message_content):
        if self.terminates_eof in last_message_content:
            return True
        return False

    def continue_messages(self, text):
        """ 
        This function continues a chat with the AI model until the AI model returns an EOF token. 
        It is needed because LLMs only return a max number of tokens in a single response, presently 4096 for OpenAI
        """
        
        messages = [
            (
                "system",
                self.prompt,
            ),
            ("human", f"{text}"),
        ]
        ai_msg = self.llm.invoke(messages)
        token_usage_list = [ai_msg.response_metadata['token_usage']]
        i=0
        if self.update_status:
            print(f"Response {i} recieved with token usage: {ai_msg.response_metadata['token_usage']['completion_tokens']} used.")
        while not self.last_message_terminates(ai_msg.content):
            i+=1
            messages.append(("system", ai_msg.content))
            messages.append(("human", self.continue_text))
            token_usage_list.append(ai_msg.response_metadata['token_usage'])
            if self.update_status:
                print(f"Response {i} recieved with token usage: {ai_msg.response_metadata['token_usage']['completion_tokens']} used.")
            ai_msg = llm.invoke(messages)
        
        messages.append(("system", ai_msg.content))
        
        if self.update_status:
            print(f"Response {i} recieved with token usage: {ai_msg.response_metadata['token_usage']['completion_tokens']} used.")

        # sum up the token usage from 'completion_tokens', 'prompt-tokens', and 'total_tokens' list of dicts    
        token_usage = {k: sum([d[k] for d in token_usage_list]) for k in token_usage_list[0].keys()}
        
        return messages, token_usage
    
    def invoke(self, text):
        messages, token_usage = self.continue_messages(text)
        self.messages = messages
        self.token_usage = token_usage
        text =  ''.join([m[1] for m in messages[1:] if m[0] == "system"])
        # remove eof
        text = text.replace(self.terminates_eof, "")
    


In [174]:
CC = ContinueChat(llm=None, base_prompt=prompt)
# messages, token_usage = CC.continued(markdown_text)
response_content = CC.invoke(markdown_text)

Response 0 recieved with token usage: 1031 used.
Response 0 recieved with token usage: 1031 used.


In [175]:

# print(response_content)

In [176]:
#write the file to disk with a _temp suffix and then open it with a system call a command line comparer to visualize the two
# files side by side.
import os
import subprocess
import tempfile
import webbrowser

def write_to_file(file_name, text):
    with open(file_name, 'w') as f:
        f.write(text)
    return file_name

# Please be sure to run `homebrew install tkdiff` or otherwise install tkdiff on your computer
# ! brew install tkdiff
# ! brew install meld # this is better because it allows management
def open_with_comparer(file_name1, file_name2, comparer='meld'):
    assert comparer in ['meld', 'tkdiff']
    subprocess.run([comparer, file_name1, file_name2])

def make_name(file_name):
    base, ext = os.path.splitext(file_name)
    temp_name = base + '_temp0' + ext
    #check to see if it exists and if so, make a new name with a _temp# where # is the next available number
    count=0
    while os.path.exists(temp_name):
        count += 1
        
        temp_name = base + f'_temp{count}' + ext
    return temp_name
temp_name = make_name(file_name)
write_to_file(temp_name, response_content)


'../../docs/Using/ethically/index_temp0.md'

In [177]:
print(f"file name: {file_name}")
print(f"temp name: {temp_name}")

file name: ../../docs/Using/ethically/index.md
temp name: ../../docs/Using/ethically/index_temp0.md


In [178]:

open_with_comparer(file_name, temp_name)



	Using the fallback 'C' locale.


Couldn't set the locale: unsupported locale setting; falling back to 'C' locale


In [179]:
input_answer = input("Is the output correct Yes/no/deletefile? (y/n/d)")
## if the answer is y then move the temp-name to the original file name and delete the temp file
if input_answer == 'y':
    os.rename(temp_name, file_name)
    print(f"File {file_name} has been updated.")
else:
    print(f"File {file_name} has not been updated.")
    # if the answer is n then delete the temp file and do nothing
    if input_answer == 'd':
        os.remove(temp_name)
        print(f"File {temp_name} has been deleted.")


File ../../docs/Using/ethically/index.md has been updated.


BOT THAT LOOKS AT DIFFERENCES CHHUNK BY CHUNK AND AMENDS THEM. 
CREATE DIFF, ITERATE ON DIFF AND UPDATE MODIFIED DOCUMENT