<a href="https://colab.research.google.com/github/aknip/Streamlit-Gradio/blob/main/myGPTlab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# myGPTlab

This app helps to process texts (longform content) with ChatGPT via API.

## Start:
**Run all cells - that's it.**
- The notebooks checks automatically, if an initial setup (with PIP etc.) is necessary. The setup status is saved in the file 'installation.done'
- You can force the setup by deleting this file or by going to Seciton "Setup and Configuration" and checking "inital_setup_mode". Afterwards uncheck "Setup and Configuration".

## Working with myGPTlab
Lorem ipsum...


# Code

In [None]:
# @title Settings

# @markdown Default model
default_model = 'GPT-3.5' # @param ["GPT-3.5", "GPT-4"]

# @markdown Start Gradio webapp.
start_gradio_webapp = False # @param {type:"boolean"}

# @markdown Initial Setup Mode for pip install, fetch credentials etc.
initial_setup_mode = False # @param {type:"boolean"}

# @markdown Debug Mode for extensive logging.
debug_mode = True # @param {type:"boolean"}

# @markdown iOS Mode to develop helper functions, no Gradio.
# @markdown Useful for development on iOS, eg. with Carnets App
ios_mode = False # @param {type:"boolean"}

In [None]:
folders = {
    'audio': 'audio',
    'audio-chunks': 'audio/chunks',
    'transcript':'audio-transcript',
    'transcript-chunks': 'audio-transcript/chunks',
    'text-input': 'text-input',
    'text-input-backup': 'text-input-backup',
    'text-output': 'text-output',
}

In [None]:
from ipywidgets import widgets
from IPython.display import Javascript, display, clear_output
notify_output = widgets.Output()
display(notify_output)
@notify_output.capture()
def popup(text):
    clear_output()
    display(Javascript("alert('{}')".format(text)))
#popup('Hello World!')

In [None]:
import os
if initial_setup_mode != True:
  if os.path.exists('installation.done'):
      initial_setup_mode = False
      print('No initial setup - forced by existing file "installation.done"')
  else:
    initial_setup_mode = True
    print('Starting automatic setup - forced by missing file "installation.done".\n\nEnter API Keys as JSON (in next notebook cell).')
    popup('Starting automatic setup. Enter API Keys as JSON (in next notebook cell).')
else:
  print('Starting setup.\n\nEnter API Keys as JSON (in next notebook cell).')
  popup('Starting setup. Enter API Keys as JSON (in next notebook cell).')

In [None]:
if initial_setup_mode == True:
  #popup('Enter API Keys as JSON:')
  !wget -q bit.ly/aknip-colab-setup
  %run aknip-colab-setup
else:
  print('No initial setup.')

In [None]:
creds = json.loads(os.getenv('CREDS'))
# openAI_key = creds['OpenAI']['v2']['credential']
# print(openAI_key)

In [None]:
if ios_mode == False:
  print('Mac')
else:
  print('iOS')

In [None]:
if initial_setup_mode == True:
  !pip install openai==0.27.7 yt-dlp==2023.7.6 librosa==0.10.0.post2 pickle-mixin==1.0.2 langchain==0.0.225 PyPDF2==3.0.1 PyMuPDF==1.22.5 -q
else:
  print('No initial setup.')


In [None]:
if (initial_setup_mode == True) and (ios_mode == False) :
  !pip install gradio -q
else:
  print('No initial setup / iOS.')

In [None]:
if (initial_setup_mode == True) and (ios_mode == False) :
  %load_ext gradio
else:
  print('No initial setup / iOS.')

In [None]:
if initial_setup_mode == True:
  f= open('installation.done','w+')
  f.close()
  print('Initial setup done. Application starting.')
  popup('Initial setup done. Application starting.')
else:
  print('No initial setup.')

## Helper Functions

- **create_file_directory**: Creates a new directory - if it not exists yet. The always_delete flag forces a deletion even if it exists.

In [None]:
# v3 - 08.08.2023
import shutil
import os
import textwrap

def create_file_directory(directory, always_delete=False):
  # Creates a new directory - if it not exists yet. The always_delete flag forces a deletion even if it exists.
  # Examples:
  # - create_file_directory('texts', False) => creates a new directory only if it not exists yet
  # - create_file_directory('texts', True) => always deletes existing directory and creates a new one
  if os.path.exists(directory):
    if always_delete:
      # delete the diectory recursively
      shutil.rmtree(directory)
  # create directory
  if not os.path.exists(directory):
    os.mkdir(directory)


def find_files(path, extensions=[".txt"], recursive=False):
    # Recursively (optional) find all files with extension in path
    my_files = []
    for root, dirs, files in os.walk(path):
        for f in files:
            if extensions == []:
                my_files.append(os.path.join(root, f))
            else:
                for ext in extensions:
                    if f.endswith(ext):
                        my_files.append(os.path.join(root, f))
        # no recursion / don't look inside any subdirectory
        if recursive == False:
            break
    return my_files


def merge_textfiles(path, extensions=[".txt"], recursive=False, new_filename='merged.txt'):
    # Recursively (optional) find all files with extension in path
    my_files = find_files(path, extensions, recursive)
    merged_text = ''
    for filename in my_files:
      # print(filename)
      f= open(filename,'r')
      if f.mode == 'r':
            contents =f.read()
      f.close()
      merged_text = merged_text + contents + '\n\n\n'

    f= open(new_filename,'w+')
    f.write(merged_text)
    f.close()

## The App

In [None]:
if (ios_mode == False) and (start_gradio_webapp == True):
  import gradio as gr

  # Theming
  theme = gr.themes.Default(
      primary_hue="slate" # , radius_size=gr.themes.Size(radius_sm="3px", radius_xs="2px", radius_xxs="1px")
  )
  # Styling: Change max width
  css = """
    .gradio-container {max-width: 700px!important}
    .vspacer1 {margin-top: 50px}
  """

  with gr.Blocks(theme=theme, css=css) as demo:

      gr.Markdown("# ChatGPTLab 2.0", elem_classes="vspacer1")
      gr.Markdown("### Optimizing your work with LLMs.")

      project_name = gr.Textbox(label="Project name")

      #
      # 1. Input Text
      #
      with gr.Tab("Input Text "):
        gr.Markdown("Please enter text")

        # Input text via UI
        gr.Markdown("### Input your text:")
        text_input = gr.Textbox(label="Enter text", placeholder="Your text here...", lines=10)
        text_output = gr.Textbox(label="Result")

        def text_save(text, proj_name):
          create_file_directory(proj_name, False)
          create_file_directory(proj_name + '/' +  folders['text-input'], False)
          f= open(proj_name + '/' +  folders['text-input'] + '/input_text.txt','w+')
          f.write(text)
          f.close()
          log_text = "Text saved."
          return log_text
        text_button = gr.Button("Save text")
        text_button.click(text_save, [text_input, project_name], text_output)

        gr.Markdown("")
        gr.Markdown("")

        # Input text via upload
        gr.Markdown("### Or upload your text:")
        upload_button = gr.UploadButton("Click to Upload a File", file_types=[".txt",".md"], file_count="single")
        file_output = gr.Textbox(label="Result")

        def upload_file(my_file, proj_name):
          create_file_directory(proj_name, False)
          create_file_directory(proj_name + '/' +  folders['text-input'], False)
          # copy to project directory
          full_upload_path = my_file.name
          just_the_filename = os.path.basename(full_upload_path)
          full_text_path = "./" + proj_name + '/' + folders['text-input'] + '/' + just_the_filename
          shutil.copyfile(full_upload_path, full_text_path)
          # check if file is empty
          f= open(full_text_path,'r')
          if f.mode == 'r': contents =f.read()
          f.close()
          log_text = just_the_filename + "\n"
          if len(contents) == 0:
            log_text = log_text + "Error: Upload file lengt 0 bytes"
          else:
            log_text = log_text + "Upload successful"
          return log_text
        upload_button.upload(upload_file, [upload_button, project_name], file_output)

      #
      # 2. Download full project
      #
      with gr.Tab("Download"):
        gr.Markdown("Download full project as ZIP file.")
        download_button = gr.Button("Download project")
        download_output = gr.File()

        def download_do(proj_name):
          full_text_path = "./" + proj_name
          shutil.make_archive('archive', 'zip', full_text_path)
          result = "Downloading " + full_text_path
          return "archive.zip"
        download_button.click(download_do, project_name, download_output)

      #
      # 3. xxx
      #
      with gr.Tab("Step 2"):
        gr.Markdown("Please select the optimization:")
        radio = gr.Radio(
          ["by headline", "by paragraph", "by §§§"], label="Text split method"
        )
        name = gr.Textbox(label="Name", placeholder="Enter text...")
        output = gr.Textbox(label="Output Box")
        greet_btn = gr.Button("Start", scale=0)
        def greet(name):
          result = "HALLO " + name + "!!!"
          return result
        greet_btn.click(fn=greet, inputs=name, outputs=output, api_name="greet")

  demo.launch(quiet=True, share=True, debug=debug_mode)

else:
  print('iOS Mode - Nothing to do.')

## ChatGPT

In [None]:
# Promptlib
# Todo: promptlib not as global,

import pickle

# myprompt = "The given text is delimited by triple backticks. Summarize the current text to succint and clear bullet points of its contents.
#          The length of the summary must be 200 words maximum."
# myprompt = "The given text is delimited by triple backticks.
#           Summarize the current text to a maximum of 15 succint and clear bullet points of its contents."
# myprompt = myprompt + "The maximum number of words should be 300 words in total. "
# myprompt = myprompt + "Write everything in German language. " # this is optional !
# myprompt = myprompt + "```" + input_text + "```"



promptlib = {
        'summary-bullets': {
            'description': 'Max word length: 1100 for EN, 900 for DE. approx. 1 min processing time Example: Full book: 113 parts á 1000 words takes 15 min.',
            'category': 'summarize',
            '1': {
                'note': 'summarize prompt v1',
                'prompt': 'The given text is delimited by triple backticks. Summarize the current text to succint and clear bullet points of its contents. {language_text} {length_text} Text:```{input_text}```',
                'lang-de': 'Write everything in German language.',
                'lang-en': 'write everything in EN.',
                'lang-same': 'write in language of original text.',
                'length-max-fix': 'The length of the summary must be 200 words maximum.',
                'length-max-dyn': 'max length is {{length_calc("{input_text}", {max_len})}} words.'
            },
            '2': {
                'note': 'summarize prompt v2',
                'prompt': 'The given text is delimited by triple backticks. Summarize the current text to a maximum of 15 succint and clear bullet points of its contents. {language_text} {length_text} Text:```{input_text}```',
                'lang-de': 'Write everything in German language.',
                'lang-en': 'write everything in EN.',
                'lang-same': 'write in language of original text.',
                'length-max-fix': 'The maximum number of words should be 300 words in total.',
                'length-max-dyn': 'max length is {{length_calc("{input_text}", {max_len})}} words.'
            }
        },
        'summary-sentence': {
            'description': '',
            'category': 'summarize',
            '1': {
                'note': 'summarize in a few sentences',
                'prompt': 'The given text is delimited by triple backticks. Summarize the text into three sentences. {language_text} {length_text} Text:```{input_text}```',
                'lang-de': 'Write everything in German language.',
                'lang-en': 'write everything in EN.',
                'lang-same': 'write in language of original text.',
                'length-max-fix': 'The length of the summary must be 200 words maximum.',
                'length-max-dyn': 'max length is {{length_calc("{input_text}", {max_len})}} words.'
            }
        },
        'para': {
            'description': 'paraphrase prompt',
            'category': 'paraphrase',
            '1': {
                'note': 'summarize prompt',
                'prompt': 'hello, this is prompttext para v1. {language_text} {length_text} Text:```{input_text}```',
                'lang-de': 'write evertyhing in DE.',
                'lang-en': 'write everything in EN.',
                'lang-same': 'write in language of original text.',
                'length-max-fix': 'max length is 100 words.',
                'length-max-dyn': 'max length is {{length_calc("{input_text}", {max_len})}} words.'
            }
        }
    }

def write_prompt_to_lib(prompt):
    # adds or updates prompt to library
    id = prompt['id']
    version = prompt['version']
    if id not in promptlib:
        # new id
        promptlib[id] = {}
        promptlib[id][version] = {}
    else:
        # id already existing, checking for version
        if version not in promptlib[id]:
            promptlib[id][version] = {}
    promptlib[id]['description'] = prompt['description']
    promptlib[id]['category'] = prompt['category']
    promptlib[id][version]['note'] = prompt['note']
    promptlib[id][version]['prompt'] = prompt['prompt']


def get_prompt_from_lib(id=None, version=None):
    # searches for prompt with given id
    # looks for highest available version, if no version is given
    if id==None:
        prompt_found = None
    else:
        prompt = promptlib[id]
        if version != None:
            # version given
            version_highest = int(version)
        else:
            # look for highest available version
            version_highest = 0
            for key, val in prompt.items():
                try:
                    version = int(key)
                except ValueError:
                    version = 0
                if version > version_highest:
                    version_highest = version
        prompt_version = prompt[str(version_highest)]
        # return dict
        prompt_found = {}
        prompt_found['id'] = id
        prompt_found['description'] = prompt['description']
        prompt_found['category'] = prompt['category']
        prompt_found['version'] = str(version_highest)
        prompt_found['note'] = prompt_version['note']
        prompt_found['prompt'] = prompt_version['prompt']
        prompt_found['lang-de'] = prompt_version['lang-de']
        prompt_found['lang-en'] = prompt_version['lang-en']
        prompt_found['lang-same'] = prompt_version['lang-same']
        prompt_found['length-max-fix'] = prompt_version['length-max-fix']
        prompt_found['length-max-dyn'] = prompt_version['length-max-dyn']
    return prompt_found

def build_prompt_from_template(input_txt, prompt_obj, lang, length='', length_max=1):
  prompt_text =  prompt_obj['prompt']
  language_text = prompt_obj[lang]
  # calculate max. output lengt dynamically
  if length != '':
    length_template = prompt_obj[length]
    length_template2 = length_template.format(input_text = input_txt, max_len=length_max)
    length_text = eval(f"f'{length_template2}'")
  else:
    length_text = ''
  final_prompt = prompt_text.format(language_text=language_text, length_text=length_text, input_text=input_txt)
  return final_prompt

def length_calc(my_text, maxWords):
  # maxWords 0.0 - 1.0 (percentage)
    return int(len(my_text) * maxWords)

def save_promptlib(promptlib):
  # saves promptlib to disk
  with open('promptlib.dictionary', 'wb') as dict_file:
      pickle.dump(promptlib, dict_file)

def load_promptlib():
  # load promptlib from disk
  with open('promptlib.dictionary', 'rb') as dict_file:
      promptlib = pickle.load(dict_file)
  return promptlib

def text_stats(input_text):
    nr_paragraphs = len(input_text.split("\n"))
    nr_words = wordcount(input_text)
    array_segments = input_text.split("§§§")
    nr_segments = len(array_segments)
    stat_txt = f'Number of words / paragraphs / §§§ segements: {nr_words} / {nr_paragraphs} / {nr_segments}'
    if nr_segments > 1:
        seg_stats = []
        seg_stats_text = ''
        for seg in array_segments:
            seg_stats.append(wordcount(seg))
            seg_stats_text = seg_stats_text + str(wordcount(seg)) + ' / '
        stat_txt = stat_txt + '\n\nMax words in §§§ segment: ' + str(max(seg_stats)) + ' (' + seg_stats_text[:-3] + ')'
    return stat_txt

def wordcount(input_text):
    # returns number of words of given input_text
    return len(input_text.split())

import random
def split_text_by_separator (text, separator, joiner, fixer, min_words=1000, max_words=0):
    # Split text into an array of texts with a maximum word count
    # optionally use random word counts between min_words and max_words (if max_words is set)
    # example 1: Split by paragraphs
    #   split_text_by_separator(input_text, '\n', '\n', 'fix-nothing', 2000)
    # example 2: Split by sentences '. ' and fix
    #   split_text_by_separator(input_text, '. ', ' ', 'fix-end', 2000)
    # example 3: Split by markdown headlines '\n#' and fix
    #   split_text_by_separator(input_text, '\n#', '\n\n', 'fix-start', 2000)
    paragraphs = []
    sections = []
    section = ''
    if max_words == 0:
      max_words = min_words
    # Split text, separated by separator
    paragraphs_stripped = text.split(separator)
    # fix paragraphs (if wanted) by re-adding the separator at end or start
    if fixer == 'fix-nothing':
        paragraphs = paragraphs_stripped
    if fixer == 'fix-end':
        for index, paragraph in enumerate(paragraphs_stripped):
            if (index+1) == len(paragraphs_stripped):
                paragraphs.append(paragraph)
            else:
                paragraphs.append(paragraph + separator)
    if fixer == 'fix-start':
        for index, paragraph in enumerate(paragraphs_stripped):
            if index == 0:
                paragraphs.append(paragraph)
            else:
                paragraphs.append(separator + paragraph)
    # Loop through paragraphs and aggregate up to maximum word count
    for index, paragraph in enumerate(paragraphs):
        if min_words == max_words:
            max_random = min_words
        else:
            max_random = random.randrange(min_words, max_words)
        test_section = section + paragraph + joiner
        if wordcount(test_section) > max_random:
            sections.append(section.strip())
            section = paragraph
            if min_words == max_words:
                max_random = min_words
            else:
                max_random = random.randrange(min_words, max_words)
        else:
            section = section + paragraph + joiner
            # if last paragraph, append to array
            if (index+1) == len(paragraphs):
              sections.append(section)
    return sections

In [None]:
test_text1 = 'Die Photosynthese ist ein physiologischer Prozess zur Erzeugung energiereicher Biomoleküle aus energieärmeren Stoffen mit Hilfe\
  von Lichtenergie. Sie wird von Pflanzen, Algen und manchen Bakterien betrieben. Bei diesem biochemischen Vorgang wird Lichtenergie mit Hilfe\
  von lichtabsorbierenden Farbstoffen wie Chlorophyll in chemische Energie umgewandelt. Diese wird dann genutzt, um aus energiearmen\
  anorganischen Stoffen (vor allem Kohlenstoffdioxid (CO2) und Wasser (H2O)) energiereiche organische Verbindungen (vor allem Kohlenhydrate)\
  aufzubauen. Der genutzte Anteil der eingestrahlten Energie, nämlich der zum Aufbau der Assimilate verwendete Anteil, wird photosynthetische\
  Effizienz genannt. Soweit die energiereichen organischen Stoffe zu Bestandteilen des Lebewesens werden, bezeichnet man deren Synthese als\
  Assimilation. Man unterscheidet zwischen oxygener und anoxygener Photosynthese. Bei der oxygenen Photosynthese wird molekularer\
  Sauerstoff (O2) freigesetzt. Bei der anoxygenen Photosynthese, die nur von Bakterien betrieben wird, entstehen statt Sauerstoff andere\
  anorganische Stoffe, beispielsweise elementarer Schwefel (S). Die Photosynthese ist der einzige biochemische Prozess, bei dem\
  Lichtenergie, meistens Sonnenlicht, in chemisch gebundene Energie umgewandelt wird (Phototrophie). Indirekt sind auch fast alle\
  heterotrophen (nicht zur Photosynthese fähigen) Lebewesen von ihr abhängig, da sie der Photosynthese letztlich ihre Nahrung und auch\
  den zur Energiegewinnung mittels aerober Atmung benötigten Sauerstoff verdanken. Aus dem Sauerstoff entsteht außerdem die schützende\
  Ozonschicht der Erdatmosphäre.'
test_text2 = f'''
# The Potential of AI in Education

So, anyone who's been paying attention for the last few months has been seeing headlines like this, especially in education. The thesis has been students are going to be using chat GPT and other forms of AI to cheat, do their assignments, they're not going to learn, and it's going to completely undermine education as we know it.

Now what I'm going to argue today is not only are there ways to mitigate all of that, if we put the right guardrails, we do the right things, we can mitigate it, but I think we're at the cusp of using AI for probably the biggest positive transformation that education has ever seen. And the way we're going to do that is by giving every student on the planet an artificially intelligent but amazing personal tutor, and we're going to give every teacher on the planet an amazing, artificially intelligent teaching assistant.  And just to appreciate how big of a deal it would be to give everyone a personal tutor, I show you this clip from Benjamin Bloom's 1984 two-sigma study, or he called it the two-sigma problem.

The two-sigma comes from two standard deviations, sigma the symbol for standard deviation, and he had good data that showed that, look, a normal distribution, that's the one that you see in the traditional bell curve right in the middle, that's how the world kind of sorts itself out, that if you were to give personal one-to-one tutoring for students, that you could actually get a distribution that looks like that right, it says tutorial one-to-one with the asterisks, like that right distribution, a two standard deviation improvement. Just to put that in plain language, that could take your average student and turn them into an exceptional student, it can take your below average student and turn them into an above average student.

# The Challenge of Scaling Personalized Instruction

Now, the reason why he framed it as a problem was he said, well, this is all good, but how do you actually scale group instruction this way? How do you actually give it to everyone in an economic way? What I'm about to show you is, I think, the first moves towards doing that. Obviously, we've been trying to approximate it in some way at Khan Academy for over a decade now, but I think we're at the cusp of accelerating it dramatically.  I'm going to show you the early stages of what RAI, which we call Khan Migo, what it can now do, and maybe a little bit of where it is actually going.  This right over here is a traditional exercise that you or many of your children might have seen on Khan Academy, but what's new is that little bot thing at the right, and we'll start by seeing one of the very important safeguards, which is the conversation is recorded and viewable by your teacher.  It's moderated, actually, by a second AI, and also, it does not tell you the answer.  It is not a cheating tool.  Notice, when the student says, tell me the answer, it says, I'm your tutor.

What do you think is the next step for solving the problem? Now, if the student makes a mistake, and this will surprise people who think large language models are not good at mathematics, notice not only does it notice the mistake, it asks the student to explain their reasoning, but it's actually doing what I would say not just even an average tutor would do, but an excellent tutor would do. It's actually able to divine what is probably the misconception in that student's mind, that they probably didn't use the distributive properly.  Remember, we need to distribute the negative two to both the nine and the 2M inside of the parentheses.  This to me is a very, very, very big deal, and it's not just in math.  This is a computer programming exercise on Khan Academy where the student needs to make the clouds part, and so we can see the student starts defining a variable, left X minus minus.

# AI as a Super Tutor

It only made the left cloud part, but then they can ask a con amigo, what's going on? Why is only the left cloud moving? And it understands the code. It knows all the context of what the student is doing, and it understands that those ellipses are there to draw clouds, which I think is kind of mind-blowing, and it says, to make the right cloud move as well, try adding a line of code inside the draw function that increments the right X variable by one pixel in each frame.  Now, this one is maybe even more amazing, because we have a lot of math teachers.  We've all been trying to teach the world to code, but there aren't a lot of computing teachers out there, and what you just saw, even when I'm tutoring my kids when they're learning to code, I can't help them this well, this fast.  This is really going to be a super tutor.  And it's not just exercises.  It understands what you're watching.

It understands the context of your video. It can answer the age-old question, why do I need to learn this? And it asks, socratically, well, what do you care about? And let's say the student says, I want to be a professional athlete, and it says, well, learning about the size of cells, which is what this video is about, that could be really useful for understanding nutrition and how your body works, et cetera.  It can answer questions.  It can quiz you.  It can connect it to other ideas.  You can now ask as many questions of a video as you could ever dream of.  So another big shortage out there, I remember the high school I went to, the student-to-guidance counselor ratio was about 200 or 300 to 1.  A lot of the country, it's worse than that.  We can use Conmigo to give every student a guidance counselor, academic coach, career coach, life coach, which is exactly what you see right over here.  And this is all, you know, we launched this with the GPT-4 launch.  We have a few thousand people on this.  This isn't a fake demo.

§§§

# Conversations with Historical Figures and AI Debates

This is really in action. And then there's, you know, things that I think it would have been even harder, it would have been a little science fiction to do with even a traditional tutor.  There's a student, we run an online high school with Arizona State University called Khan World School.  And we have a student who attends that online school based in India, her name is Sanvi, and she was doing a report on the Great Gatsby.  And when she was reading the Great Gatsby, Jay Gatsby keeps looking at the green light off into the distance.  And she's like, well, why does he do that? She did some web searches, and people have obviously studied this and commented about the symbolism of that, but none of it was really resonating with her.  And then she realized that she had Conmigo and that she could talk to Jay Gatsby himself.

# Unlocking Learning Literature and AI Debates

And so, ah, splendid choice, old sport, I am now Jay Gatsby, the enigmatic millionaire from Escott Jarrell's classic. And so why do you keep staring at the green light? Ah, the green light, old sport.  It's a symbol of my dreams and desires, you see.  It's situated at the end of Daisy Buchanan's dock, across the bay from my mansion.  I gaze at it longingly as it represents my yearning for the past and my hope to reunite with Daisy, the love of my life.  And what was cool is Sanvi had said, I had this long conversation, she called him Mr.  Gatsby.  And at the end, she actually apologized for taking his time, which I thought was very polite of her.  But you can imagine, this unlocks learning literature, learning, you can talk to historical figures, we're even probably going to add an activity, you can talk to, like, the Mississippi River.  It brings things to life in ways that really were science fiction even six months or a year ago.  Students can get into debates with the AI.  And we've got, here's the students debating whether we should cancel student debt.  The student is against canceling student debt.  And we've gotten very clear feedback.  We've started running it at Con World School and our lab school that we have, Con Lab School.  The students, the high school students especially, they're saying, this is amazing to be able to fine tune my arguments without fearing judgment.

# Enhancing Language Arts with AI Writing Collaboration

It makes me that much more confident to kind of go into the classroom and really participate. And we all know that Socratic dialogue debate is a great way to learn.  But frankly, it's not out there for most students.  But now it can be accessible to hopefully everyone.  A lot of the narrative, we saw that in the headlines, has been it's going to do the writing for kids.  Kids are not going to learn to write.  But we are showing that there's ways that the AI doesn't write for you.  It writes with you.  So this is a little thing.  And my eight-year-old is addicted to this.  And he's not a kid that really liked writing before.  But where, you know, you could say, I want to write a horror story.  And it says, ooh, a horror story.  How spine-tingling and thrilling.  Let's dive into the world of eerie shadows and chilling mysteries.

And this is an activity where the student will write two sentences. And then the AI will write two sentences.  And so they collaborate together on a story.  The students write, Beatrice was a misunderstood ghost.  She wanted to make friends but kept scaring them by accident.  And the AI says, poor Beatrice, a lonely spirit yearning for companionship.  One day she stumbled upon an old abandoned mansion, et cetera, et cetera.  I encourage you all to, you know, hopefully one day try this.  This is surprisingly fun.  Now to even more directly hit this use case.  And what I'm about to show you, everything I showed you so far is actually already part of Calmigo.  What I'm about to show you, we haven't shown to anyone yet.

# Enhancing Reading Comprehension and Writing Skills with AI

This is a prototype. We hope to be able to launch it in the next few months.  But this is to directly use AI, use generative AI, to not undermine English and language arts but to actually enhance it in ways that we couldn't have even conceived of even a year ago.  This is reading comprehension.  This is the students reading Steve Jobs' famous speech at Stanford.  And then as they get to certain points, they can click on that little question.  And the AI will then, Socratically, almost like an oral exam, ask the student about things.  And the AI can highlight parts of the passage.  Why did the author use that word? What was their intent? Does it back up their argument? They can start to do stuff that, once again, we never had the capability to give everyone a tutor, everyone a writing coach, to actually dig into reading at this level.  And you could go on the other side of it.  We have a whole workflow that helps them write, helps them be a writing coach, draw an outline.  But once a student actually constructs a draft, and this is where they're constructing a draft, they can ask for feedback, once again, as you would expect from a good writing coach.  In this case, the student we'll say, let's say, does my evidence support my claim? And then the AI not only is able to give feedback, but it's able to highlight certain parts of the passage and says, you know, on this passage, this doesn't quite support your claim, but once again, Socratically says, can you tell us why? So it's pulling the student, it's making them a better writer, giving them far more feedback than they've ever been able to actually get before, and we think this is going to dramatically accelerate writing, not hurt it.

§§§

# Personalized Education for Teachers
Now, everything I've talked about so far is for the student, but we think this could be equally as powerful for the teacher to drive more personalized education and, frankly, save time and energy for themselves and for their students. So this is an American history exercise on Khan Academy.  It's a question about the Spanish-American war.  And at first, it's in student mode, and if you say, tell me the answer, it's not going to tell the answer, it's going to go into tutoring mode.  But that little toggle which teachers have access to, they can turn student mode off, and then it goes into teacher mode.  And what this does is, it turns into, you could view it as a teacher's guide on steroids.  Not only can it explain the answer, it can explain how you might want to teach it.

# Benefits for Teachers
It can help prepare the teacher for that material. It can help them create lesson plans, as you can see doing right there.  It'll eventually help them create progress reports, it'll help them eventually grade.  So once again, teachers spend about half their time with this type of activity, lesson planning, all of that energy can go back to them or go back to human interactions with their actual students.  So, you know, one point I want to make, these large language models are so powerful, there's a temptation to say, well, all these people are just going to slap them onto their websites, and it kind of turns the applications themselves into commodities.  And what I've got to tell you is, I kind of thought that's one of the reasons why I didn't sleep for two weeks when I first had access to GPT-4 back in August.

# Enhancing AI Tutoring
But we quickly realized that to actually make it magical, I think it's really important to make it magical, but we quickly realized that to actually make it magical, I think what you saw with Conmigo a little bit, it didn't interact with you the way that you see chat GPT interacting, it was a little bit more magical, it was more Socratic, it was clearly much better at math than what most people are used to thinking. And the reason is there was a lot of work behind the scenes to make that happen.  And I could go through the whole list of everything we've been working on, many, many people, for over six, seven months, to make it feel magical, but perhaps the most intellectually interesting one is we realized, and this was an idea from an open AI researcher, that we could dramatically improve its ability in math and its ability in tutoring if we allowed the AI to think before it speaks.

# The Future of AI and Education
So if you're tutoring someone and you immediately just start talking before you assess their math, you might not get it right. But if you construct thoughts for yourself, and what you see on the right there is an actual AI thought, something that it generates for itself but it does not share with the student, then its accuracy went up dramatically and its ability to be a world-class tutor went up dramatically.  And you can see it's talking to itself here.  It says, the student got a different answer than I did, but do not tell them they made a mistake.  Instead, ask them to explain how they got to that step.  So I'll just finish off.

# The Role of AI in Education and the Need for Positive Use Cases
Hopefully, what I've just shown you is just half of what we are working on, and we think this is just the very tip of the iceberg of where this can actually go. And I'm pretty convinced, which I wouldn't have been even a year ago, that we, together, have a chance of addressing the two-sigma problem and turning it into a two-sigma opportunity, dramatically accelerating education as we know it.  Now, just to take a step back at a meta-level, obviously, we heard a lot today, the debates on either side.  There's folks who take a more pessimistic view of AI.  They say, this is scary, there's all these dystopian scenarios.  We maybe want to slow down.  We want to pause.  On the other side, there are the more optimistic folks who say, well, we've gone through inflection points before.

We've gone through the Industrial Revolution. It was scary, but it all kind of worked out.  And what I'd argue right now is, I don't think this is like a flip of a coin or this is something where we'll just have to wait and see which way it turns out.  I think everyone here and beyond, we are active participants in this decision.  I'm pretty convinced that the first line of reasoning is actually almost a self-fulfilling prophecy, that if we act with fear and if we say, hey, we just got to stop doing this stuff, what's really going to happen is the rule followers might pause, might slow down, but the rule breakers, as Alexander mentioned, the totalitarian governments, the criminal organizations, they're only going to accelerate.

And that leads to what I am pretty convinced is the dystopian state, which is the good actors have worse AIs than the bad actors. But I'll also talk to the optimist a little bit.  I don't think that means that, oh yeah, then we should just relax and just hope for the best.  That might not happen either.  I think all of us together have to fight like hell to make sure that we put the guardrails, we put in, when the problems arise, reasonable regulations, but we fight like hell for the positive use cases.  Because very close to my heart, and obviously there's many potential positive use cases, perhaps the most powerful use case, and perhaps the most poetic use case, is if AI, artificial intelligence, can be used to enhance HI, human intelligence, human potential and human purpose.  Thank you.  Thank you.  Thank you.
'''

In [None]:
tmp = split_text_by_separator(test_text1, '\n', '\n', 'fix-nothing', 200)
print(text_stats(test_text1))
print(len(tmp))

In [None]:
# Create summary

from datetime import datetime
import langchain
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import (AIMessage, HumanMessage, SystemMessage)
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Chroma
from langchain.schema import HumanMessage
from langchain.callbacks.base import BaseCallbackHandler

# OpenAI Key
openAI_key = creds['OpenAI']['v2']['credential']

def create_test_project(proj_name, proj_text):
  create_file_directory(proj_name, False)
  create_file_directory(proj_name + '/' +  folders['text-input'], False)
  create_file_directory(proj_name + '/' +  folders['text-output'], False)
  f= open(proj_name + '/' +  folders['text-input'] + '/testfile.txt','w+')
  f.write(proj_text)
  f.close()

def execute_prompt (exec_mode, input_file_path, output_file_path, prompt_obj, lang, length='', length_max=1):

  # Read file from disk
  f= open(input_file_path,'r')
  if f.mode == 'r': input_text =f.read()
  f.close()
  if exec_mode['loglevel'] >= 1:
    print(json.dumps(prompt_obj, sort_keys=False, indent=2) + '\n')
    print(textwrap.fill(input_text, 120) + '\n')

  llm = ChatOpenAI(model_name=exec_mode['model'], temperature=0.0, openai_api_key=openAI_key)
  # model overview see https://gptforwork.com/guides/openai-gpt3-models

  #input_text_segments = input_text.split("§§§")
  input_text_segments = split_text_by_separator(input_text, '\n', '\n', 'fix-nothing', 1000)
  output_text_segments = []
  print('Model: ' + exec_mode['model'])
  print('Started at: ' + datetime.now().strftime("%H:%M:%S") + '\n')
  for index, input_text in enumerate(input_text_segments):

      print('Processing step ' + str(index + 1) + ' of ' + str(len(input_text_segments)))

      prompt_txt_final = build_prompt_from_template(input_text, prompt_obj, lang, length, length_max)
      prompt_txt_final_SHORT = build_prompt_from_template(input_text[:30], prompt_obj, lang, length, length_max)
      if exec_mode['loglevel'] <= 1:
        print(textwrap.fill(prompt_txt_final_SHORT, 120) + '\n')
      if exec_mode['loglevel'] == 2:
        print(textwrap.fill(prompt_txt_final, 120) + '\n')

      if exec_mode['model'] != 'no':
        gpt_response_obj = llm.generate([[HumanMessage(content=prompt_txt_final)]])
        tmp_text = gpt_response_obj.generations[0][0].text
        tmp_tokens = gpt_response_obj.llm_output
        if exec_mode['loglevel'] >= 1:
          print(tmp_text + '\n')
        print(str(tmp_tokens) + '\n')
        output_text_segments.append(tmp_text)

  print('\nEnded at: ' + datetime.now().strftime("%H:%M:%S") + '\n')
  full_output_text = '\n\n'.join(output_text_segments)
  # save to disk
  f= open(output_file_path,'w+')
  f.write(full_output_text)
  f.close()

  return full_output_text



In [None]:
# test promptlib and templates

import textwrap

prompt_obj = get_prompt_from_lib('summary-bullets', '2')
# prompt_obj = get_prompt_from_lib('summary-bullets', '1')
# print(json.dumps(prompt_obj, sort_keys=True, indent=2))

input_text = 'This is the input text. Lorem ipsum.'

prompt_txt_final = build_prompt_from_template(input_text, prompt_obj, 'lang-de', 'length-max-dyn', 0.5)
prompt_txt_final = build_prompt_from_template(input_text, prompt_obj, 'lang-de', 'length-max-fix')
prompt_txt_final = build_prompt_from_template(input_text, prompt_obj, 'lang-de')
print(textwrap.fill(prompt_txt_final, 120))

#write_prompt_to_lib(prompt_obj)

#print(json.dumps(promptlib, sort_keys=True, indent=2))

# myprompt = "The given text is delimited by triple backticks. Summarize the current text to succint and clear bullet points of its contents.
#          The length of the summary must be 200 words maximum."
# myprompt = "The given text is delimited by triple backticks.
#           Summarize the current text to a maximum of 15 succint and clear bullet points of its contents."
# myprompt = myprompt + "The maximum number of words should be 300 words in total. "
# myprompt = myprompt + "Write everything in German language. " # this is optional !
# myprompt = myprompt + "```" + input_text + "```"

## Test GPT

In [None]:
def my_test():
  # test-init: Create folders and testfile
  proj_name = 'my-test-proj'
  create_test_project(proj_name, test_text2)

  # Start here: Read file from disk

  # Step 1: Create summary of input (bullets)
  input_file_path = find_files(proj_name + '/' +  folders['text-input'], ['.txt'], False)[0]
  output_file_path = proj_name + '/' +  folders['text-output'] + '/summary-bullets.txt'
  prompt_obj = get_prompt_from_lib('summary-bullets', '2')
  result_text = execute_prompt({'model':'gpt-3.5-turbo', 'loglevel':0}, input_file_path, output_file_path, prompt_obj, 'lang-de')
  # result_text = execute_prompt({'model':'gpt-4', 'loglevel':2}, input_file_path, output_file_path, prompt_obj, 'lang-de', 'length-max-dyn', 0.5)
  # result_text = execute_prompt({'model':'no', 'loglevel':0}, input_file_path, output_file_path, prompt_obj, 'lang-de', 'length-max-fix')

  # Step 2: Create summary of bullets (3 sentences)
  input_file_path = proj_name + '/' +  folders['text-output'] + '/summary-bullets.txt'
  output_file_path = proj_name + '/' +  folders['text-output'] + '/summary-3-sentences.txt'
  prompt_obj = get_prompt_from_lib('summary-sentence') # last version
  result_text = execute_prompt({'model':'gpt-3.5-turbo', 'loglevel':0}, input_file_path, output_file_path, prompt_obj, 'lang-de')

  print(textwrap.fill(result_text, 120) + '\n')

In [None]:
# my_test()

In [None]:
from ipywidgets import Layout, Button, Box, VBox, HTML

def show_workbench():
  # Header flex proportionally to the weight
  header_1 = widgets.HTML(value='<div style="background-color: #ccc; text-align: center">Hello <b>World</b></div>', layout=Layout(flex='1 1 auto', width='auto'))
  items_header = [header_1]

  # Text flex proportionally to the weight
  text_1 = HTML(value='Infotext', layout=Layout(flex='1 1 auto', width='25%'))
  text_2 = HTML(value='Infotext jdskl fjkdslf jsdklfjslf', layout=Layout(flex='1 1 auto', width='25%'))
  text_3 = HTML(value='Infotext jdskl fjkdslf jsdklfjslf', layout=Layout(flex='1 1 auto', width='25%'))
  text_4 = HTML(value='Infotext jdskl fjkdslf jsdklfjslf', layout=Layout(flex='1 1 auto', width='25%'))
  items_texts = [text_1, text_2, text_3, text_4]

  # Buttons flex proportionally to the weight
  button_1 = Button(description='Install', layout=Layout(flex='1 1 0%', width='auto'), button_style='success')
  button_2 = Button(description='Save Text', layout=Layout(flex='1 1 0%', width='auto'), button_style='success')
  button_3 = Button(description='Split Text', layout=Layout(flex='1 1 0%', width='auto'), button_style='success')
  button_4 = Button(description='Chat', layout=Layout(flex='1 1 0%', width='auto'), button_style='success')
  items_buttons = [button_1, button_2, button_3, button_4]

  # Output flex proportionally to the weight
  output_1 = widgets.Output(layout=Layout(flex='1 1 auto', width='auto'))
  items_output = [output_1]

  box_layout = Layout(display='flex',
                      flex_flow='row',
                      align_items='stretch',
                      width='90%')
  box_header = Box(children=items_header, layout=box_layout)
  box_texts = Box(children=items_texts, layout=box_layout)
  box_buttons = Box(children=items_buttons, layout=box_layout)
  box_output = Box(children=items_output, layout=box_layout)

  def on_button_1_clicked(b):
      with output_1:
          print('Printed using output widget') # + my_input_text.value)
          my_test()
  button_1.on_click(on_button_1_clicked)

  def on_button_2_clicked(b):
      output_1.clear_output()

  button_2.on_click(on_button_2_clicked)

  return [box_header, box_texts, box_buttons, box_output]


# Workbench

In [None]:
VBox(show_workbench())

In [None]:
# see https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html
my_input_text = widgets.Textarea(placeholder='Enter input text', disabled=False)
button = widgets.Button(description="Click Me!")
output = widgets.Output()

def on_button_clicked(b):
    with output:
        print('Clicked:' + my_input_text.value)
        my_test()

button.on_click(on_button_clicked)
display(my_input_text, button, output)