In [1]:
from langchain.llms import OpenAI
# from langchain.chat_models import ChatOpenAI
import os
# initialize the models

models = ['gpt-3.5-turbo-16k', 'gpt-4','gpt-4-32k',"text-davinci-003"]
model = models[1]

openai = OpenAI(
    model_name=model,
    openai_api_key= os.environ["OPENAI_API_KEY"]
)



In [2]:
BASE_DOCS_DIR = '../../docs/'
# file_name = dosc

In [3]:
from pathlib import Path

class DisplayablePath(object):
    display_filename_prefix_middle = '├──'
    display_filename_prefix_last = '└──'
    display_parent_prefix_middle = '    '
    display_parent_prefix_last = '│   '

    def __init__(self, path, parent_path, is_last):
        self.path = Path(str(path))
        self.parent = parent_path
        self.is_last = is_last
        if self.parent:
            self.depth = self.parent.depth + 1
        else:
            self.depth = 0

    @property
    def displayname(self):
        if self.path.is_dir():
            return self.path.name + '/'
        return self.path.name

    @classmethod
    def make_tree(cls, root, parent=None, is_last=False, criteria=None):
        root = Path(str(root))
        criteria = criteria or cls._default_criteria

        displayable_root = cls(root, parent, is_last)
        yield displayable_root

        children = sorted(list(path
                               for path in root.iterdir()
                               if criteria(path)),
                          key=lambda s: str(s).lower())
        count = 1
        for path in children:
            is_last = count == len(children)
            if path.is_dir():
                yield from cls.make_tree(path,
                                         parent=displayable_root,
                                         is_last=is_last,
                                         criteria=criteria)
            else:
                yield cls(path, displayable_root, is_last)
            count += 1

    @classmethod
    def _default_criteria(cls, path):
        return True

    @property
    def displayname(self):
        if self.path.is_dir():
            return self.path.name + '/'
        return self.path.name

    def displayable(self):
        if self.parent is None:
            return self.displayname

        _filename_prefix = (self.display_filename_prefix_last
                            if self.is_last
                            else self.display_filename_prefix_middle)

        parts = ['{!s} {!s}'.format(_filename_prefix,
                                    self.displayname)]

        parent = self.parent
        while parent and parent.parent is not None:
            parts.append(self.display_parent_prefix_middle
                         if parent.is_last
                         else self.display_parent_prefix_last)
            parent = parent.parent

        return ''.join(reversed(parts))

# With a criteria (skip hidden files)
def is_not_hidden(path):
    return  not ( 'Icon' in path.name or '.DS_Store' in path.name or 'stylesheets'  in path.name or \
        'CNAME' in path.name or 'assets' in path.name or '.svg' in path.name or '.pages' in path.name)
    
# paths = DisplayablePath.make_tree(
#     Path(base_docs_dir),
#     criteria=is_not_hidden
# )
# for path in paths:
#     print(path.displayable())



# paths = DisplayablePath.make_tree(Path(base_docs_dir), criteria=is_not_hidden)
# for path in paths:
#     print(path.displayable())

def get_tree_structure(path_base=BASE_DOCS_DIR):
    
    paths = DisplayablePath.make_tree(Path(path_base), criteria=is_not_hidden)
    path_str = [p.displayable() for p in paths]
    # for path in paths:
    #     print(path.displayable())
    # return ''.join([p for p in path.displayable()])
    return '\n'.join(path_str)
    
tree_structure = get_tree_structure()
print(tree_structure)

docs/
├── index.md
├── Managen.ai/
│   ├── brainstorming.md
│   ├── build_plan.md
│   ├── contributing.md
│   ├── index.md
│   ├── managing.md
│   └── requirements.md
├── Understanding/
│   ├── agents/
│   │   ├── actions_and_tools.md
│   │   ├── agents.md
│   │   ├── chains.md
│   │   ├── environments.md
│   │   ├── evaluating_and_comparing.md
│   │   ├── evaluation.md
│   │   ├── examples.md
│   │   ├── index.md
│   │   ├── interpreters.md
│   │   ├── memory.md
│   │   ├── rag.md
│   │   └── systems.md
│   ├── architectures/
│   │   ├── alignment.md
│   │   ├── embedding.md
│   │   ├── evaluating_and_comparing.md
│   │   ├── finetuning.md
│   │   ├── index.md
│   │   ├── models/
│   │   │   ├── components.md
│   │   │   ├── developing_architectures.md
│   │   │   ├── diffusers.md
│   │   │   ├── gans.md
│   │   │   ├── hybrid_models.md
│   │   │   ├── index.md
│   │   │   ├── reinforcement_learning.md
│   │   │   └── transformers.md
│   │   ├── optimization.md
│   │   ├── optimizing_

In [4]:
def get_file_name(file_path, base_dir=BASE_DOCS_DIR):
    # iterator for getting filenames
    return os.path.join(base_dir, file_path)

def get_structure_pattern():
    pattern = \
    """
    <<intro>>
    ## <<First topic>>
    ### <<topic sub component>>
    ### <<topic sub component>>
    ### ...
    ## <<Second topic>>
    ### <<topic sub component>>
    ### ...
    ## ...
    ## Essential References
    << List with '-' of references with each reference providing written as [link_title](link_address) and a thoughtful but succinct output>>
    """
    return pattern


def get_markdown_text(markdown_file):
    with open(markdown_file, 'r') as f:
        markdown_text = f.read()
    # print(markdown_text)
    return markdown_text
# Could potentially do this is in few-shot prompt templates
# These should be generated

In [12]:
role = "expert AI technology creator and communicator"

project_name = "Managing Generative AI"
project_goals = "Overall: Create an automated repository that is able to explain in plain-English and in code, "\
                "Generative AI and how to improve upon it. "
present_task_description="Improve the markdown based on best understandings."\
                         "Be as honest and as accurate as possible. Be succinct in your responses. Preserve any URLS."

from langchain import PromptTemplate
# Idea
# Select between prompt patterns
# Chain select them more effectively. 
# The present tree-structure:\n {tree_structure}\n 
# Please use a heading/subheading structure that follows the general pattern : {structure_pattern}\n
template = \
""" 
You are a {role}
You are working on a project called: {project_name}\n
You are part of a team working to: {project_goals}\n
You are helping to: {present_task_description}\n
You are helping to rewrite and expand a file called {file_name} 
Here are some things we'd like you to be sure to do:
* Please present ALL html links without changing the link's text. 
* Preserve any urls or relative links without changing them. 
* Be sure to use `##` `###` subheadings and appropriately to reference sections and subsections.
* Please be sure to keep any amonitions like `!!!` and `???`.
* Please reformat any bulleted lists of links where github links have `!!! code`, arxiv's have `!!! tip` and others have `!!! information`. 

After the markdown When the text is presented (after >>>), please improve upon it. 
If text is sparse or missing create a reasonable outline following the pattern above and fill it in.

Markdown Response: 
>>>\n
{markdown_text}"""

prompt_template = PromptTemplate(
    # input_variables=["role", "project_name", "project_goals", "present_task_description", "file_name", "tree_structure", "structure_pattern", "markdown_text", ],
        input_variables=["role", "project_name", "project_goals", "present_task_description", "file_name",  "markdown_text", ],
    template=template
)
file_from_base_dir = 'Using/deploying/index.md'
file_name=get_file_name(file_from_base_dir)
tree_structure=get_tree_structure()
markdown_text=get_markdown_text(file_name)
structure_pattern = get_structure_pattern()
prompt=prompt_template.format(role=role,
                              project_name=project_name,
                       project_goals=project_goals,
                       present_task_description=present_task_description,
                       file_name=file_name,
                       # tree_structure=tree_structure,
                       #  structure_pattern=structure_pattern,
                       markdown_text=markdown_text,)

In [13]:


# project_name = "Managing Generative AI"
# project_goals = "Overall: Create an automated repository that is able to explain in plain-English and in code, "\
#                 "Generative AI and how to improve upon it. "
# present_task_description="Improve the markdown based on best understandings."\
#                          "Be as honest and as accurate as possible. Be succinct in your responses. Preserve any URLS."

# from langchain import PromptTemplate

# template = \
# """ You are working on a project called: {project_name}\n
# You are part of a team working to: {project_goals}\n
# You are helping to: {present_task_description}\n
# You are helping to rewrite and expand a file called {file_name}\n
# Primarily you are tasked with adding admonitions to the markdown document to make it more nice to read. 
# for instance, you will see formats like this:
# '''
# - [Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/pdf/2304.03442.pdf) A simulation of different agents of different personalities with a time-evolving environment that could be manipulated by the agents.   In it they discuss several challenges and solutions:

#     **Remembering**
    
#     _Observation Memory_ A memory stream maintaining a record of experience: memory objects with a description in natural language, and timestamping.
#     Uses, _recency_, _importance_ and relevance_ to add weight to information that is more recent, how the memory is compared in relation to other memories, and how the information pertains to the present situation. 
# '''
# This needs to be reformatted in the following manner:
# '''
# <div class="result" markdown>
# !!! tip "[Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/pdf/2304.03442.pdf)"
#     A simulation of different agents of different personalities with a time-evolving environment that could be manipulated by the agents.   In it they discuss several challenges and solutions:

# ??? example 
#      **Remembering**
    
#     _Observation Memory_ A memory stream maintaining a record of experience: memory objects with a description in natural language, and timestamping.
#     Uses, _recency_, _importance_ and relevance_ to add weight to information that is more recent, how the memory is compared in relation to other memories, and how the information pertains to the present situation. 
# </div> 

# You should make this modification for EVERY link that is presented that doesn't have admonitions already there. 
# After the markdown When the text is presented, please improve upon it. If no text is present create a reasonable outline following the pattern above and fill it in.
# Please preserve any urls or relative links without changing them. 
# Please be sure to use `#` appropriately to reference sections and subsections.
# Please be sure to use appropriate spacing to make admonitions work.

# Here is the markdown text:
# {markdown_text}
# """

# prompt_template = PromptTemplate(
#     input_variables=["project_name", "project_goals", "present_task_description", "file_name", "markdown_text"],
#     template=template
# )
# file_from_base_dir = 'Understanding/agents/systems.md'
# file_name=get_file_name(file_from_base_dir)

# markdown_text=get_markdown_text(file_name)
# prompt=prompt_template.format(project_name=project_name,
#                        project_goals=project_goals,
#                        present_task_description=present_task_description,
#                        file_name=file_name,
#                        markdown_text=markdown_text)

In [14]:
print(prompt)

 
You are a expert AI technology creator and communicator
You are working on a project called: Managing Generative AI

You are part of a team working to: Overall: Create an automated repository that is able to explain in plain-English and in code, Generative AI and how to improve upon it. 

You are helping to: Improve the markdown based on best understandings.Be as honest and as accurate as possible. Be succinct in your responses. Preserve any URLS.

You are helping to rewrite and expand a file called ../../docs/Using/deploying/index.md 
Here are some things we'd like you to be sure to do:
* Please present ALL html links without changing the link's text. 
* Preserve any urls or relative links without changing them. 
* Be sure to use `##` `###` subheadings and appropriately to reference sections and subsections.
* Please be sure to keep any amonitions like `!!!` and `???`.
* Please reformat any bulleted lists of links where github links have `!!! code`, arxiv's have `!!! tip` and others

In [15]:
ans = openai(prompt)

In [17]:
print(ans)

Deploying Generative AI models involves making these concepts accessible for users or other software applications. Although initially, the deployment may only involve 'making a model available for use,' there are potential restrictions to consider. Therefore, it can be necessary to separate the model's deployment from the project encapsulating the model, even though they interrelate directly. 

This process will involve various components, especially for customers with more stringent requirements. 

Important steps typically include storing models of desired specifications in a file and then loading them for user interaction. User inputs are directed to the served model – usually batched to enhance the average request latency. The output is then redirected to users in an appropriate manner. 

As with other AI-enabling products, it is essential to consider the following aspects:

### 1. [Caller Needs](#caller-needs) (Customer Requirements)

The requirements of your target audience great

In [18]:
role = "expert AI technology creator and communicator"

project_name = "Managing Generative AI"
project_goals = "Overall: Create an automated repository that is able to explain in plain-English and in code, "\
                "Generative AI and how to improve upon it. "
present_task_description="Improve the markdown based on best understandings."\
                         "Be as honest and as accurate as possible. Be succinct in your responses. Preserve any URLS."

from langchain import PromptTemplate
# Idea
# Select between prompt patterns
# Chain select them more effectively. 
# The present tree-structure:\n {tree_structure}\n 
# Please use a heading/subheading structure that follows the general pattern : {structure_pattern}\n
template = \
""" 
You are a {role}
You are working on a project called: {project_name}\n
You are part of a team working to: {project_goals}\n
You are helping to: {present_task_description}\n
Please help to organize this effectively as possible. 
"""
prompt_template = PromptTemplate(
    # input_variables=["role", "project_name", "project_goals", "present_task_description", "file_name", "tree_structure", "structure_pattern", "markdown_text", ],
        input_variables=["role", "project_name", "project_goals", "present_task_description", "file_name",  "markdown_text", ],
    template=template
)
file_from_base_dir = 'Using/deploying/index.md'
file_name=get_file_name(file_from_base_dir)
tree_structure=get_tree_structure()
markdown_text=get_markdown_text(file_name)
structure_pattern = get_structure_pattern()
prompt=prompt_template.format(role=role,
                              project_name=project_name,
                       project_goals=project_goals,
                       present_task_description=present_task_description,
                       file_name=file_name,
                       # tree_structure=tree_structure,
                       #  structure_pattern=structure_pattern,
                       markdown_text=markdown_text,)

ValidationError: 1 validation error for PromptTemplate
__root__
  Invalid prompt schema; check for mismatched or missing input parameters. {'markdown_text', 'file_name'} (type=value_error)

In [34]:
import markdown
md = markdown.markdown(markdown_text, extensions=['toc'])
# from markdown.extensions.toc import TocExtension
# html = markdown.markdown(markdown_text, extensions=[TocExtension(baselevel=1)])
# print(html)


In [2]:
import re
import json

def extract_toc(md_text, level_key="l", text_key='t'):
    # Regex to match markdown headings, capturing the level based on hash count and the heading text
    pattern = re.compile(r'^(?P<hashes>#+) (?P<text>.+)$', re.MULTILINE)
    matches = pattern.findall(md_text.strip())

    toc_structure = [{level_key: len(hashes), text_key: text} for hashes, text in matches]
    return toc_structure

def serialize_toc(toc_structure):
    return json.dumps(toc_structure, indent=4)

def test_extract_toc():
    md_text = """
# Title

## Section 1

Content here

### Subsection 1.1

More content here

## Section 2

### Subsection 2.1

Yet more content
    """

    toc_structure = extract_toc(md_text)
    print(toc_structure)
    serialized_toc = serialize_toc(toc_structure)

    expected_output = """
[
    {
        "l": 1,
        "t": "Title"
    },
    {
        "l": 2,
        "t": "Section 1"
    },
    {
        "l": 3,
        "t": "Subsection 1.1"
    },
    {
        "l": 2,
        "t": "Section 2"
    },
    {
        "l": 3,
        "t": "Subsection 2.1"
    }
]
    """

    assert serialized_toc.strip() == expected_output.strip(), f"Expected:\n{expected_output}\nGot:\n{serialized_toc}"

test_extract_toc()


[{'l': 1, 't': 'Title'}, {'l': 2, 't': 'Section 1'}, {'l': 3, 't': 'Subsection 1.1'}, {'l': 2, 't': 'Section 2'}, {'l': 3, 't': 'Subsection 2.1'}]


In [3]:
3

3