###Installing necessary packages

In [None]:
!pip install utils

In [None]:
!pip install google.generativeai

In [None]:
!pip install google.api_core

In [None]:
!pip install gradio

### Importing necessary libraries

In [5]:
import os
import io
import IPython.display
from PIL import Image
import base64
import requests
requests.adapters.DEFAULT_TIMEOUT = 60

In [6]:
import google.generativeai as palm
from google.api_core import client_options as client_options_lib


### Setting up PaLM API

In [7]:
palm.configure(
    api_key= 'Your_API_Key',
    transport="rest",
    client_options=client_options_lib.ClientOptions(
        api_endpoint=os.getenv("GOOGLE_API_BASE"),
    )
)

###Setting up model

In [10]:
models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods]
model_bison = models[0]
model_bison

Model(name='models/text-bison-001',
      base_model_id='',
      version='001',
      display_name='Text Bison',
      description='Model targeted for text generation.',
      input_token_limit=8196,
      output_token_limit=1024,
      supported_generation_methods=['generateText', 'countTextTokens', 'createTunedTextModel'],
      temperature=0.7,
      top_p=0.95,
      top_k=40)

###Setting up text generation function

In [11]:
from google.api_core import retry
@retry.Retry()
def generate_text(prompt,
                  model=model_bison,
                  temperature=0.0):
    return palm.generate_text(prompt=prompt,
                              model=model,
                              temperature=temperature)

### Initializing promp tempelate to set up context

In [12]:
prompt_template = """
You are a junior developer bot who is still learning. You are paired with the user for paired programming assignment.
You will always generate syntax wrong code, but you will correct the code once the user tells you to.

Here is the user prompt

{question}

"""

### Launching Chatbot

In [13]:
import gradio as gr
import os

In [14]:
def generate(input, slider):
  """Generates text based on the given prompt and max new tokens."""
  #prompt=prompt_template.format(question=input)
  response = generate_text(prompt=prompt_template.format(question=input))
  outputs = response.result
  return outputs

demo = gr.Interface(
                    fn=generate,
                    inputs=[gr.Textbox(label="Prompt"),
                            gr.Slider(label="Max new tokens", value=20, maximum=2048, minimum=1)],
                    outputs=[gr.Textbox(label="Output")],
                    title="Python Code Explainer"
                    )
gr.close_all()

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://35b933a47c5d1d036a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




### Setting up chatbot with conversational memory

In [15]:
def format_chat_prompt(message, chat_history, instruction):
    prompt = f"System:{instruction}"
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
    prompt = f"{prompt}\nUser: {message}\nAssistant:"
    return prompt

def respond(message, chat_history, instruction, temperature=0.7):
    prompt = format_chat_prompt(message, chat_history, instruction)
    chat_history = chat_history + [[message, ""]]
    stream = generate_text(prompt,
                                      stop_sequences=["\nUser:", "<|endoftext|>"],
                                      temperature=temperature)
                                      #stop_sequences to not generate the user answer
    acc_text = ""
    #Streaming the tokens
    for idx, response in enumerate(stream):
            text_token = response.token.text

            if response.details:
                return

            if idx == 0 and text_token.startswith(" "):
                text_token = text_token[1:]

            acc_text += text_token
            last_turn = list(chat_history.pop(-1))
            last_turn[-1] += acc_text
            chat_history = chat_history + [last_turn]
            yield "", chat_history
            acc_text = ""

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240) #just to fit the notebook
    msg = gr.Textbox(label="Prompt")
    with gr.Accordion(label="Advanced options",open=False):
        system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
        temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1)
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot]) #Press enter to submit
gr.close_all()
demo.launch(share=True)



Closing server running on port: 7860
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://ddd887c712cdc6edcd.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Here you can see that the model does not output response because PaLM api models do not have a method for generating stream data. When we try to set up a chat history for the model, the model stopped working.

To overcome this, we can create memory buffer for conversation chat, which will require extra memory and computation (https://python.langchain.com/docs/modules/memory/), or we can use a different model.

* Referenced from https://learn.deeplearning.ai/pair-programming-llm/lesson/1/introduction
* Referenced from https://developers.generativeai.google/guide