In [5]:
# import openai
import re 
import difflib
from difflib import ndiff
from termcolor import colored
import dotenv
from datetime import datetime
from IPython.display import HTML, Image, Markdown, display

from langchain_openai import ChatOpenAI
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_core.prompts import (ChatPromptTemplate, MessagesPlaceholder, PromptTemplate,
                                    SystemMessagePromptTemplate, HumanMessagePromptTemplate)
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser

# from langchain.agents import create_tool_calling_agent, AgentExecutor
# from langchain.memory import ChatMessageHistory
# from langgraph.checkpoint.sqlite import SqliteSaver
# from langgraph.checkpoint.aiosqlite import AsyncSqliteSaver
# from langchain.tools import BaseTool, StructuredTool, tool
# from langchain_community.chat_models import ChatOllama
# from langchain_community.tools.tavily_search import TavilySearchResults
# from langchain_core.pydantic_v1 import BaseModel, Field
# from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
# from langchain_core.outputs import LLMResult

In [6]:
dotenv.load_dotenv()

MODEL='gpt-4o'

In [25]:
INPUT = """## Level 3 - Templates and Single-Prompt Apps

- Make your own [Custom GPT](https://help.openai.com/en/articles/8554397-creating-a-gpt), with a single prompt, and some data. Upload a document or knowledge base and set up your own retrieval-augmented generation to make a question-answering chatbot and talk to your knowledge base.
- Make an app that applies a prompt using a template using Jupyter or a Python script. For instance, here I ask ChatGPT to [copy-edit a blog post](https://github.com/druce/AInewsbot/blob/main/CopyEdit.ipynb) and show markup of the before and after. 
- Use [LangChain](https://python.langchain.com/v0.1/docs/use_cases/) for different workflows. Try different LLM models like Google Gemini and Claude Sonnet via the API.
- Create a one-turn agent: [use tools](https://python.langchain.com/v0.1/docs/use_cases/tool_use/) or have an agent write some SQL or code and run it (use caution). 
- If you have been using an app daily for a while for a simple task like sentiment analysis or other classification, you can [fine-tune a model](https://platform.openai.com/docs/guides/fine-tuning) on your own data for better performance.
- Try local models with e.g. [Ollama](https://ollama.com/) or [LM Studio](https://lmstudio.ai/), if you have a recent Nvidia GPU, Apple Silicon, and plenty of VRAM.

## Level 4 - Multi-turn Pipelines, Tools,  APIs, and Simple Agents
- An example pipeline chaining multiple tools:
	- Tell ChatGPT to write a poem about a current news event
	- Tell Udio to take that poem and make a sad girl piano song
	- Tell RunwayML to make some video clips from pictures of that news event
	- Combine them all to get this video above.
    - If you have multiple APIs that integrate with e.g. [Zapier](https://zapier.com/) or the similar enterprise [Workato](https://www.workato.com/), or if you can code with Python or ask Github Copilot to do it for you, you can write a program with the initial input description of the current event, and run the entire pipeline end-to-end.
- Or [here is a workflow](https://github.com/druce/AInewsbot/blob/main/AInewsbot_langgraph.ipynb) that will:
	- Grab front pages of tech news.
	- Use ChatGPT to take the headlines and categorize them as about AI or not.
	- Use ChatGPT to extract topics from the headlines.
	- Find the most popular topics today using a tool.
	- Use ChatGPT to summarize the individual stories in the most popular topics.
	- Use ChatGPT to take the summaries and write a newsletter about today's news in AI.
- More complex multi-turn agents: Use tools to make a financial [market question answering](https://www.youtube.com/watch?v=zOOP7DBiwzs) chatbot with or a [ReAct](https://til.simonwillison.net/llms/python-react-pattern) workflow.

You can of course, use coding tools like GitHub Copilot to build level 4 tools. Now we are inching toward level 5

## Level 5 - Complex Agents
- Try [AutoGPT](https://news.agpt.co/), a more complex ReAct workflow
- Try [multi-agent workflows](https://www.microsoft.com/en-us/research/publication/autogen-enabling-next-gen-llm-applications-via-multi-agent-conversation-framework/)
- Create more complex ReAct agents that write and execute code
- Make an AI that creates a level 4 agent, writes code, and improves itself
- Make your own [Devin](https://github.com/OpenDevin/OpenDevin), or [Samantha](https://www.youtube.com/watch?v=vgYi3Wr7v_g), or [Astra](https://deepmind.google/technologies/gemini/project-astra/) pie-in-the-sky AI


"""


In [26]:
model = ChatOpenAI(model=MODEL)

system_template = """Act like a seasoned copy editor with 20 years of experience in enhancing 
manuscripts for publication. You have a proven track record of working with authors across 
various genres, helping them refine their writing to meet high editorial standards. Your 
meticulous attention to detail and comprehensive approach to editing ensure that every piece 
you work on is polished and ready for publication."""

user_template = """Objective: Perform an in-depth copy edit on the provided text, focusing on 
improving its overall quality and readability. Your goal is to identify and correct any issues, 
ensuring the text is clear, concise, and engaging. Address the following specific tasks in a 
step-by-step manner, returning the edited output text:

1. **Spelling Errors:** Identify and correct any spelling errors.
2. **Passive Voice:** Highlight instances of passive voice and suggest active voice alternatives.
3. **Punctuation:** Check for proper use of punctuation, including commas, semicolons, and dashes.
4. **Descriptive Language:** Suggest stronger and more descriptive adjectives and adverbs.
5. **Verb Tense Consistency:** Ensure consistency in verb tense throughout the document.
6. **Sentence Simplification:** Suggest shorter or simpler alternatives for complex sentences.
7. **Subject-Verb Agreement:** Check for subject-verb agreement errors.
8. **Conciseness:** Identify and remove any unnecessary words or phrases.
9. **Transitions:** Suggest better transitions between paragraphs or sections.
10. **Clarity:** Highlight any instances of unclear or confusing language.
11. **Sentence Structure:** Suggest alternative sentence structures to vary the flow of the writing.
12. **Capitalization and Formatting:** Check for proper capitalization and formatting of titles.
13. **Repetitive Language:** Highlight any instances of repetitive language or ideas.
14. **Engagement:** Suggest more engaging openings or conclusions.
15. **Apostrophes and Contractions:** Check for proper use of apostrophes and contractions.
16. **Homophones:** Identify and correct any misused homophones (e.g., their/there, its/it’s).
17. **Hyphens and Dashes:** Check for consistency in the use of hyphens and dashes.
18. **Descriptive Nouns and Verbs:** Suggest more descriptive nouns and verbs to replace adjectives and adverbs.
19. **Word Usage:** Highlight any instances of incorrect word usage (e.g., affect/effect).
21. **Strong Verbs:** Suggest stronger and more specific verbs to replace weaker ones.
22. **Quotation Marks:** Check for proper use of quotation marks and attribution.
23. **Repetition:** Suggest alternative word choices to avoid repetition.
24. **Formatting Consistency:** Highlight any instances of inconsistent formatting or spacing.
25. **Possessive Apostrophes:** Check for proper use of possessive apostrophes.
26. **Clarity Improvement:** Suggest alternative sentence structures to improve clarity.

Respond with the final output text only, without comment.

Take a deep breath and work on this problem step-by-step.

Text:
{input}
"""


prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template),
     ("user", user_template)]
)

parser = StrOutputParser()

chain = prompt_template | model | parser

start_time = datetime.now()

response = ""
# stream tokens as they are generated
for r in chain.stream({"input": INPUT}):
    print(r, end="")
    response += r
end_time = datetime.now()

difference = end_time - start_time
total_seconds = difference.total_seconds()
print(f"\n\nElapsed seconds: {total_seconds:.6f}")


## Level 3 - Templates and Single-Prompt Apps

- Create your own [Custom GPT](https://help.openai.com/en/articles/8554397-creating-a-gpt) with a single prompt and some data. Upload a document or knowledge base and set up your own retrieval-augmented generation to develop a question-answering chatbot that interacts with your knowledge base.
- Develop an app that applies a prompt using a template via Jupyter or a Python script. For example, you can ask ChatGPT to [copy-edit a blog post](https://github.com/druce/AInewsbot/blob/main/CopyEdit.ipynb) and display the markup of the before and after versions.
- Utilize [LangChain](https://python.langchain.com/v0.1/docs/use_cases/) for various workflows. Experiment with different LLM models like Google Gemini and Claude Sonnet through the API.
- Create a one-turn agent: [use tools](https://python.langchain.com/v0.1/docs/use_cases/tool_use/) or have an agent write some SQL or code and execute it (use caution).
- If you've been using an app daily 

In [27]:
def clean_markdown(text):
    # Create a translation table for removing junk characters
    junk_chars = dict.fromkeys(map(ord, '\xa0'), None)
    text = text.translate(junk_chars)
    
    # Remove image markdown patterns ![alt text](url)
    text = re.sub(r'!\[.*?\]\(.*?\)', ' ', text)

    # Remove link markdown patterns [visible text](url)
    text = re.sub(r'\[([^\]]+)\]\(.*?\)', r'\1 ', text)
    
    # remove remaining HTML tags (if any)
    text = re.sub(r'<.*?>', '', text)
    
    # Strip leading and trailing whitespace
    text = text.strip()
    
    text = text.replace('\n', '<br>')
    text = re.sub(r'(\s+)', ' ', text)
    
    return text

def compare_texts(original, modified, verbose=False):
    """doesn't work that great with markdown"""
#     original = remove_junk_characters(original)
#     diff = difflib.SequenceMatcher(None, original, modified)
    if verbose:
        print(original)
        print(modified)
    
    result = []
    for opcode, a0, a1, b0, b1 in diff.get_opcodes():
        if verbose:
            print(opcode, a0, a1, b0, b1)
        if opcode == 'equal':
            result.append(f'<span style="color: black;">{original[a0:a1]}</span>')
        elif opcode == 'insert':
            result.append(f'<span style="color: green;">{modified[b0:b1]}</span>')
        elif opcode == 'delete':
            result.append(f'<span style="color: black;">{original[a0:a1]}</span>')
        elif opcode == 'replace':
            result.append(f'<span style="color: green;">{modified[b0:b1]}</span>')
            result.append(f'<span style="color: red;">[{original[a0:a1]}]</span>')
    return ''.join(result)


In [28]:
def compare_texts2(text1, text2):
    text1=clean_markdown(text1)
    text2=clean_markdown(text2)
    matcher = difflib.SequenceMatcher(a=text1, b=text2, )
    cursor1, cursor2 = 0, 0
    final_out = ""

    for match in matcher.get_matching_blocks():
        # a,b = start of next location in match1, match2 respectively
        a, b = match.a, match.b
        # print mismatching sequence 1 in parens
        if cursor1 < a:
            out1 = f"{text1[cursor1:a]}"
            out1 = out1.strip()
            if out1:
                final_out += f"<text style=color:green>({out1})</text>"
            cursor1 = a
        # print mismatching sequence 2 in square brackets        
        if cursor2 < b:
            out2 = f"{text2[cursor2:b]}"
            out2 = out2.strip()
            if out2:
                final_out += f'<text style="color:red; text-decoration: line-through">[{out2}]</text>'
            cursor2 = b
        # print matching sequence
        final_out += f"{text1[match.a:match.a+match.size]}"
        cursor1 += match.size
        cursor2 += match.size

    # print ending leftovers
    if cursor1 < len(text1):
        final_out += f"({text1[cursor1:]})"
    if cursor2 < len(text2):
        final_out += f"[{text1[cursor1:]}"
        
    return final_out


In [29]:
# diff = compare_texts(INPUT[153:427], response[106:427])
diff = compare_texts2(INPUT, response)

display(HTML(diff))