In [1]:
from langchain.llms import OpenAI
import os
# initialize the models

models = ['gpt-3.5-turbo-16k', 'gpt-4','gpt-4-32k',"text-davinci-003"]
model = models[1]

openai = OpenAI(
    model_name=model,
    openai_api_key= os.environ["OPENAI_API_KEY"]
)



In [2]:
BASE_DOCS_DIR = '../../docs/'
# file_name = dosc

In [3]:
from pathlib import Path

class DisplayablePath(object):
    display_filename_prefix_middle = '├──'
    display_filename_prefix_last = '└──'
    display_parent_prefix_middle = '    '
    display_parent_prefix_last = '│   '

    def __init__(self, path, parent_path, is_last):
        self.path = Path(str(path))
        self.parent = parent_path
        self.is_last = is_last
        if self.parent:
            self.depth = self.parent.depth + 1
        else:
            self.depth = 0

    @property
    def displayname(self):
        if self.path.is_dir():
            return self.path.name + '/'
        return self.path.name

    @classmethod
    def make_tree(cls, root, parent=None, is_last=False, criteria=None):
        root = Path(str(root))
        criteria = criteria or cls._default_criteria

        displayable_root = cls(root, parent, is_last)
        yield displayable_root

        children = sorted(list(path
                               for path in root.iterdir()
                               if criteria(path)),
                          key=lambda s: str(s).lower())
        count = 1
        for path in children:
            is_last = count == len(children)
            if path.is_dir():
                yield from cls.make_tree(path,
                                         parent=displayable_root,
                                         is_last=is_last,
                                         criteria=criteria)
            else:
                yield cls(path, displayable_root, is_last)
            count += 1

    @classmethod
    def _default_criteria(cls, path):
        return True

    @property
    def displayname(self):
        if self.path.is_dir():
            return self.path.name + '/'
        return self.path.name

    def displayable(self):
        if self.parent is None:
            return self.displayname

        _filename_prefix = (self.display_filename_prefix_last
                            if self.is_last
                            else self.display_filename_prefix_middle)

        parts = ['{!s} {!s}'.format(_filename_prefix,
                                    self.displayname)]

        parent = self.parent
        while parent and parent.parent is not None:
            parts.append(self.display_parent_prefix_middle
                         if parent.is_last
                         else self.display_parent_prefix_last)
            parent = parent.parent

        return ''.join(reversed(parts))

# With a criteria (skip hidden files)
def is_not_hidden(path):
    return ( '.pages' in path.name or not path.name.startswith(".") ) and 'Icon' not in path.name
    
# paths = DisplayablePath.make_tree(
#     Path(base_docs_dir),
#     criteria=is_not_hidden
# )
# for path in paths:
#     print(path.displayable())



# paths = DisplayablePath.make_tree(Path(base_docs_dir), criteria=is_not_hidden)
# for path in paths:
#     print(path.displayable())

def get_tree_structure(path_base=BASE_DOCS_DIR):
    
    paths = DisplayablePath.make_tree(Path(path_base), criteria=is_not_hidden)
    path_str = [p.displayable() for p in paths]
    # for path in paths:
    #     print(path.displayable())
    # return ''.join([p for p in path.displayable()])
    return '\n'.join(path_str)
    
tree_structure = get_tree_structure()
print(tree_structure)

docs/
├── .pages
├── assets/
│   ├── genai_logo_edited.svg
│   └── genai_logo_v1.png
├── CNAME
├── Engineering/
│   ├── .pages
│   ├── actions_and_tools.md
│   ├── commercial_products.md
│   ├── computation.md
│   ├── deployment.md
│   ├── examples.md
│   ├── frameworks_and_tools.md
│   ├── index.md
│   ├── marking_and_detecting.md
│   ├── models.md
│   ├── observability.md
│   ├── regulation.md
│   └── web_plugins.md
├── index.md
├── Managing/
│   ├── .pages
│   ├── brainstorming.md
│   ├── build_plan.md
│   ├── contributing.md
│   └── managing.md
├── stylesheets/
│   └── extra.css
└── Understanding/
    ├── .pages
    ├── agents/
    │   ├── .pages
    │   ├── chains.md
    │   ├── environments.md
    │   ├── evaluation.md
    │   ├── examples.md
    │   ├── index.md
    │   ├── interpreters.md
    │   ├── memory.md
    │   └── systems.md
    ├── data/
    │   ├── .pages
    │   ├── augmentation.md
    │   ├── data.md
    │   ├── embedding.md
    │   ├── preprocessing.md
    │   ├── 

In [4]:
def get_file_name(file_path, base_dir=BASE_DOCS_DIR):
    # iterator for getting filenames
    return os.path.join(base_dir, file_path)

def get_structure_pattern():
    pattern = \
    """
    <<intro>>
    ## <<First topic>>
    ### <<topic sub component>>
    ### <<topic sub component>>
    ### ...
    ## <<Second topic>>
    ### <<topic sub component>>
    ### ...
    ## ...
    ## Essential References
    << List with '-' of references with each reference providing written as [link_title](link_address) and a thoughtful but succinct output>>
    """
    return pattern


def get_markdown_text(markdown_file):
    with open(markdown_file, 'r') as f:
        markdown_text = f.read()
    # print(markdown_text)
    return markdown_text
# Could potentially do this is in few-shot prompt templates
# These should be generated

In [10]:


project_name = "Managing Generative AI"
project_goals = "Overall: Create an automated repository that is able to explain in plain-English and in code, "\
                "Generative AI and how to improve upon it. "
present_task_description="Improve the markdown based on best understandings."\
                         "Be as honest and as accurate as possible. Be succinct in your responses. Preserve any URLS."

from langchain import PromptTemplate

template = \
""" You are working on a project called: {project_name}\n
You are part of a team working to: {project_goals}\n
You are helping to: {present_task_description}\n
You are helping to rewrite and expand a file called {file_name} in the present tree-structure:\n {tree_structure}\n 
Please use a heading/subheading structure that follows the general pattern : {structure_pattern}\n
Please present html links without changing the link's text. 
After the markdown When the text is presented, please improve upon it. If no text is present create a reasonable outline following the pattern above and fill it in.
Please preserve any urls or relative links without changing them. 
Please be sure to use `#` appropriately to reference sections and subsections.
Markdown Response: {markdown_text}"""

prompt_template = PromptTemplate(
    input_variables=["project_name", "project_goals", "present_task_description", "file_name", "tree_structure", "markdown_text", "structure_pattern"],
    template=template
)
file_from_base_dir = 'Understanding/overview/index.md'
file_name=get_file_name(file_from_base_dir)
tree_structure=get_tree_structure()
markdown_text=get_markdown_text(file_name)
structure_pattern = get_structure_pattern()
prompt=prompt_template.format(project_name=project_name,
                       project_goals=project_goals,
                       present_task_description=present_task_description,
                       file_name=file_name,
                       tree_structure=tree_structure,
                        structure_pattern=structure_pattern,
                       markdown_text=markdown_text,)

In [20]:


project_name = "Managing Generative AI"
project_goals = "Overall: Create an automated repository that is able to explain in plain-English and in code, "\
                "Generative AI and how to improve upon it. "
present_task_description="Improve the markdown based on best understandings."\
                         "Be as honest and as accurate as possible. Be succinct in your responses. Preserve any URLS."

from langchain import PromptTemplate

template = \
""" You are working on a project called: {project_name}\n
You are part of a team working to: {project_goals}\n
You are helping to: {present_task_description}\n
You are helping to rewrite and expand a file called {file_name}\n
Primarily you are tasked with adding admonitions to the markdown document to make it more nice to read. 
for instance, you will see formats like this:
'''
- [Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/pdf/2304.03442.pdf) A simulation of different agents of different personalities with a time-evolving environment that could be manipulated by the agents.   In it they discuss several challenges and solutions:

    **Remembering**
    
    _Observation Memory_ A memory stream maintaining a record of experience: memory objects with a description in natural language, and timestamping.
    Uses, _recency_, _importance_ and relevance_ to add weight to information that is more recent, how the memory is compared in relation to other memories, and how the information pertains to the present situation. 
'''
This needs to be reformatted in the following manner:
'''
<div class="result" markdown>
!!! tip "[Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/pdf/2304.03442.pdf)"
    A simulation of different agents of different personalities with a time-evolving environment that could be manipulated by the agents.   In it they discuss several challenges and solutions:

??? example 
     **Remembering**
    
    _Observation Memory_ A memory stream maintaining a record of experience: memory objects with a description in natural language, and timestamping.
    Uses, _recency_, _importance_ and relevance_ to add weight to information that is more recent, how the memory is compared in relation to other memories, and how the information pertains to the present situation. 
</div> 

You should make this modification for EVERY link that is presented that doesn't have admonitions already there. 
After the markdown When the text is presented, please improve upon it. If no text is present create a reasonable outline following the pattern above and fill it in.
Please preserve any urls or relative links without changing them. 
Please be sure to use `#` appropriately to reference sections and subsections.
Please be sure to use appropriate spacing to make admonitions work.

Here is the markdown text:
{markdown_text}
"""

prompt_template = PromptTemplate(
    input_variables=["project_name", "project_goals", "present_task_description", "file_name", "markdown_text"],
    template=template
)
file_from_base_dir = 'Understanding/agents/systems.md'
file_name=get_file_name(file_from_base_dir)

markdown_text=get_markdown_text(file_name)
prompt=prompt_template.format(project_name=project_name,
                       project_goals=project_goals,
                       present_task_description=present_task_description,
                       file_name=file_name,
                       markdown_text=markdown_text)

In [21]:
print(prompt)

 You are working on a project called: Managing Generative AI

You are part of a team working to: Overall: Create an automated repository that is able to explain in plain-English and in code, Generative AI and how to improve upon it. 

You are helping to: Improve the markdown based on best understandings.Be as honest and as accurate as possible. Be succinct in your responses. Preserve any URLS.

You are helping to rewrite and expand a file called ../../docs/Understanding/agents/systems.md

Primarily you are tasked with adding admonitions to the markdown document to make it more nice to read. 
for instance, you will see formats like this:
'''
- [Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/pdf/2304.03442.pdf) A simulation of different agents of different personalities with a time-evolving environment that could be manipulated by the agents.   In it they discuss several challenges and solutions:

    **Remembering**
    
    _Observation Memory_ A memory s

In [22]:
ans = openai(prompt)

In [23]:
print(ans)

When an agent (or model) engages in an interaction with another agent, the result is an agent system. This is achieved by implementing and equipping various agents, and then setting them up so that the output of one is used as the input of the other. Although one may argue that an agent's input can be perceived as another 'tool' where the different agent prompts the action, this argument isn't entirely valid. The reason is that, in most cases, the same considerations apply to all agents but not to all tools. Therefore, we deal with it separately.

<div class="result" markdown>

!!! example "Binary system (asymmetric calling)"
    In this system, ChatGPT initiates communication with DallE using a prompt. DallE responds by delivering an image. This image is then used in the final response of ChatGPT or returned as-is.

<div class="result" markdown>
        
!!! example "Multi-body system (bidirectional calling)"
    This system consists of multiple agents, and they engage in ongoing disc

In [34]:
import markdown
md = markdown.markdown(markdown_text, extensions=['toc'])
# from markdown.extensions.toc import TocExtension
# html = markdown.markdown(markdown_text, extensions=[TocExtension(baselevel=1)])
# print(html)


In [2]:
import re
import json

def extract_toc(md_text, level_key="l", text_key='t'):
    # Regex to match markdown headings, capturing the level based on hash count and the heading text
    pattern = re.compile(r'^(?P<hashes>#+) (?P<text>.+)$', re.MULTILINE)
    matches = pattern.findall(md_text.strip())

    toc_structure = [{level_key: len(hashes), text_key: text} for hashes, text in matches]
    return toc_structure

def serialize_toc(toc_structure):
    return json.dumps(toc_structure, indent=4)

def test_extract_toc():
    md_text = """
# Title

## Section 1

Content here

### Subsection 1.1

More content here

## Section 2

### Subsection 2.1

Yet more content
    """

    toc_structure = extract_toc(md_text)
    print(toc_structure)
    serialized_toc = serialize_toc(toc_structure)

    expected_output = """
[
    {
        "l": 1,
        "t": "Title"
    },
    {
        "l": 2,
        "t": "Section 1"
    },
    {
        "l": 3,
        "t": "Subsection 1.1"
    },
    {
        "l": 2,
        "t": "Section 2"
    },
    {
        "l": 3,
        "t": "Subsection 2.1"
    }
]
    """

    assert serialized_toc.strip() == expected_output.strip(), f"Expected:\n{expected_output}\nGot:\n{serialized_toc}"

test_extract_toc()


[{'l': 1, 't': 'Title'}, {'l': 2, 't': 'Section 1'}, {'l': 3, 't': 'Subsection 1.1'}, {'l': 2, 't': 'Section 2'}, {'l': 3, 't': 'Subsection 2.1'}]


In [3]:
3

3