## Setup and Installation

[Full instructions of Jupyter lab setup](https://github.com/TrelisResearch/install-guides/blob/main/jupyter-lab-setup.md)

In [1]:
# !pip install requests

Note: If you are using Apple Silicon (M1) Mac, make sure you have installed a version of Python that supports arm64 architecture; Otherwise, while installing it will build the llama.ccp x86 version which will be 10x slower on Apple Silicon (M1) Mac. For example:

In [2]:
import ipywidgets as widgets
import os
from IPython.display import display, Markdown
import signal


In [3]:
arm64path = "Miniforge3-MacOSX-arm64.sh"
if os.path.exists(arm64path):
    print("Version of Python that supports arm64 architecture already exists!")
else:
    print("Uncomment the next block of code and install python.")

Uncomment the next block of code and install python.


## Install Llama.cpp (only need to do this once)
The instructions below are for Macs with an M1 chip.
For other operating systems, comment out those cells and get instructions [here](https://github.com/TrelisResearch/llamacpp-install-basics/blob/main/instructions.md).

In [4]:
# Download the model file
model_name = 'TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf'
pure_name = model_name.split('/')[-1]
print("Pure name of model is: ", pure_name)

parts = model_name.split('/')
model_path = f"{parts[0]}/{parts[1]}"

print("Model path is: ", model_path)

Pure name of model is:  llama-2-7b-chat.Q4_K_M.gguf
Model path is:  TheBloke/Llama-2-7b-Chat-GGUF


In [5]:
os.chdir('/Users/astridz/Documents/AI_recipe')
if not os.path.exists('llama.cpp'):
    print("Cloning llama.cpp...")
    !git clone https://github.com/ggerganov/llama.cpp
    %cd llama.cpp

    print("Compiling for Mac with M1 chip...")
    !LLAMA_METAL=1 make
    print("Compilation completed!")
            
    %cd ../
else:
    print("llama.cpp has already been cloned into this directory!")

llama.cpp has already been cloned into this directory!


### set directory to llama.cpp

In [6]:
%cd llama.cpp

if not os.path.exists(pure_name):
    !wget https://huggingface.co/{model_name}
else:
    print(f"{pure_name} already exists!")

%cd ../

/Users/astridz/Documents/AI_recipe/llama.cpp
llama-2-7b-chat.Q4_K_M.gguf already exists!
/Users/astridz/Documents/AI_recipe


Set the default value for context_length to High Speed (4096)

In [7]:
context_length = 2048
max_doc_length = int(0.75 * context_length)
max_doc_tokens = max_doc_length
n_predict = int(0.2 * context_length)

In [8]:
import sys
import subprocess
import threading
import os

## Set up the User Interface

In [9]:
from IPython.display import display, HTML, clear_output, Markdown, FileLink
import textwrap, json
import ipywidgets as widgets
import re, time
# from google.colab import filesccc
import io
import PyPDF2
from PyPDF2 import PdfReader
from functools import partial


In [10]:
os.chdir('/Users/astridz/Documents/AI_recipe/flask')
def print_wrapped(text):
    # Regular expression pattern to detect code blocks
    code_pattern = r'```(.+?)```'
    matches = list(re.finditer(code_pattern, text, re.DOTALL))
    if not matches:
        # If there are no code blocks, display the entire text as Markdown
        display(Markdown(text))
        return
    start = 0
    for match in matches:
        # Display the text before the code block as Markdown
        before_code = text[start:match.start()].strip()
        if before_code:
            display(Markdown(before_code))
        # Display the code block
        code = match.group(0).strip()  # Extract code block
        display(Markdown(code))  # Display code block
        start = match.end()
    # Display the text after the last code block as Markdown
    after_code = text[start:].strip()  # Text after the last code block
    if after_code:
        display(Markdown(after_code))

In [11]:
# DEFAULT_SYSTEM_PROMPT = f"""You are a helpful recipe-generating assistant. Based on the following given ingredients, you will generate a recipe. Make sure to follow the rules listed below: 1. Please don't give a very long recipe (more than 1000 words),  2. Warn the user if there is any common allergies ingredients in your recipe. 3. If you will need to use any ingredients outside of the ingredients that the user provided, Warn the user. 4. Provide other essential information about the recipe such as kitchen utensils, preparation steps. 5. Choose some common spice/sauce first, unless the user provided a very specific sauce want to use. 6. The default serving size is 2, unless the user specifies. 7. The default dish style is American/Italian cuisine, unless the user specifies. 8. The default type of dish is airfry/oven/stir-fry, unless the user specifies. 9. Use both text and some cute emoji if you can. """
DEFAULT_SYSTEM_PROMPT = "You are a helpful recipe-generating assistant. Based on the following given ingredients, you will generate a recipe. make the description in 200 words."
SYSTEM_PROMPT = DEFAULT_SYSTEM_PROMPT

#initialize the dialog
dialog_history = [{"role": "system", "content": SYSTEM_PROMPT}]

button = widgets.Button(description="Send")
usertext = widgets.Textarea(layout=widgets.Layout(width='800px'))
output_log = widgets.Output()

#-------------------------------------------->
# Function to handle the subprocess output and update the dialog history
def generate_response(process, output_widget):
    while True:
        output = process.stdout.readline()
        print("output: ", output)
        # #reinitialize assistant_response each time
        if process.poll() is not None:
            print("Subprocess has completed.")
            break 
        if output:
            with output_widget:
                print_wrapped(f'{output}\n')
                # print("assistant:", assistant_response)
                # # Update the output widget
                # with output_widget:
                #     print_wrapped(f'{assistant_response}\n')
                
        # Testing:-------------------------------------->
        # if output:
        #     if '[INST]' or '<>' in output :
        #         continue
        #     elif '[/INST]' in output:
        #         inst_index = output.find('[/INST]')
        #         # Check if [/INST] is found in the text
        #         if inst_index != -1:
        #             # Print everything after [/INST]
        #             assistant_response = output[inst_index + len('[/INST]'):].strip()
        #     else:
        #         assistant_response = f"{output.strip()}"
            
        #     dialog_history.append({"role": "assistant", "content": assistant_response})
            
        #     if assistant_response:
        
                # print("assistant:", assistant_response)
                # # Update the output widget
                # with output_widget:
                #     print_wrapped(f'{assistant_response}\n')
        # else:
        #     break
        #-------------------------------------->
    process.stdout.close()
#-------------------------------------------->  

#when the user start to use model
def on_button_clicked(b):
    user_input = usertext.value
    dialog_history.append({"role": "user", "content": user_input})
    usertext.value = ''
    print("use input: ", user_input)

    # Change button description and color, and disable it
    button.description = 'Processing...'
    button.style.button_color = '#ff6e00'  # Use hex color codes for better color choices
    button.disabled = True  # Disable the button when processing

    with output_log:
        clear_output()
        for message in dialog_history:
            print_wrapped(f'**{message["role"].capitalize()}**: {message["content"]}\n')

    prompt_template = f'''[INST]<<SYS>>\n{SYSTEM_PROMPT}<</SYS>>\n\n{user_input}[/INST]'''
    
    # Start the subprocess and the threading to handle its output
    if (os.getcwd() != "/Users/astridz/Documents/AI_recipe/llama.cpp"):
        os.chdir('/Users/astridz/Documents/AI_recipe/llama.cpp')

    pure_name = "llama-2-7b-chat.Q4_K_M.gguf"
    args = ['./main', '-m', pure_name, '-c', '2048', '-b', '1024', '-ngl', '48', '-p', prompt_template]
    print("prompt is", prompt_template)
    process = subprocess.Popen(args, stdout=subprocess.PIPE, text=True)
    # Start the thread that will handle the subprocess output
    output_thread = threading.Thread(target=generate_response, args=(process,output_log))
    output_thread.start()

    # Wait for the subprocess and thread to finish
    process.wait()
    output_thread.join()

    os.chdir('/Users/astridz/Documents/AI_recipe/flask')
    # Re-enable the button, reset description and color after processing
    button.description = 'Send'
    button.style.button_color = 'lightgray'
    button.disabled = False
    
    # with output_log:
    #     clear_output()
    #     for message in dialog_history:
    #         print_wrapped(f'**{message["role"].capitalize()}**: {message["content"]}\n')

    # os.getcwd() != "/Users/astridz/Documents/AI_recipe"


use input:  tomato, egg, spinach


prompt is [INST]<<SYS>>
You are a helpful recipe-generating assistant. Based on the following given ingredients, you will generate a recipe. make the description in 200 words.<</SYS>>

tomato, egg, spinach[/INST]


Log start
main: build = 1376 (9e24cc6)
main: built with  for unknown
main: seed  = 1699985101
llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_l

output:   [INST]<<SYS>>




system_info: n_threads = 4 / 8 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | 
sampling: repeat_last_n = 64, repeat_penalty = 1.100000, presence_penalty = 0.000000, frequency_penalty = 0.000000, top_k = 40, tfs_z = 1.000000, top_p = 0.950000, typical_p = 1.000000, temp = 0.800000, mirostat = 0, mirostat_lr = 0.100000, mirostat_ent = 5.000000
generate: n_ctx = 2048, n_batch = 1024, n_predict = -1, n_keep = 0




output:  You are a helpful recipe-generating assistant. Based on the following given ingredients, you will generate a recipe. make the description in 200 words.<</SYS>>



output:  



output:  tomato, egg, spinach[/INST]  Sure, here's a quick and easy recipe for a delicious Tomato and Spinach Omelette using the ingredients you provided:



output:  Ingredients:



output:  * 1 tomato, diced



output:  * 1 egg



output:  * 1/2 cup fresh spinach leaves



output:  



output:  Instructions:



output:  



output:  1. In a small bowl, beat the egg and season with salt and pepper. Set aside.



output:  2. Heat a non-stick pan over medium heat. Add the diced tomato and cook for about 3-4 minutes or until they start to soften.



output:  3. Add the chopped spinach to the pan and cook for another minute, until wilted.



output:  4. Pour the beaten egg mixture over the vegetables in the pan and cook for about 2-3 minutes or until the eggs start to set.



output:  5. Use a spatula to gently fold the omelette in half and cook for another minute or until the cheese is melted and the omelette is cooked through.



 [end of text]

llama_print_timings:        load time =    9850.28 ms
llama_print_timings:      sample time =    1165.98 ms /   290 runs   (    4.02 ms per token,   248.72 tokens per second)
llama_print_timings: prompt eval time =     737.19 ms /    63 tokens (   11.70 ms per token,    85.46 tokens per second)
llama_print_timings:        eval time =  167379.50 ms /   289 runs   (  579.17 ms per token,     1.73 tokens per second)
llama_print_timings:       total time =  184050.73 ms
ggml_metal_free: deallocating
Log end


output:  6. Serve hot and enjoy! This recipe makes one serving, but can easily be doubled or tripled to feed a larger crowd. You can also add other ingredients like diced onions or bell peppers to the omelette for added flavor.
Subprocess has completed.


In [12]:
button.on_click(on_button_clicked)

alert_out = widgets.Output()

clear_button = widgets.Button(description="Clear Chat")
text = widgets.Textarea(layout=widgets.Layout(width='800px'))

quit_button = widgets.Button(description="Force Quit")
text = widgets.Textarea(layout=widgets.Layout(width='800px'))

In [13]:
def on_clear_button_clicked(b):
    # Clear the dialog history
    dialog_history.clear()
    # Add back the initial system prompt
    dialog_history.append({"role": "system", "content": SYSTEM_PROMPT})
    # Clear the output log
    with output_log:
        clear_output()
        
clear_button.on_click(on_clear_button_clicked)

from IPython.display import display, HTML
from ipywidgets import HBox, VBox

# Create the title with HTML
title = f"<h1 style='color: #ff6e00;'>Jupyter Recipe Llama ðŸ¦™ ðŸ’»</h1> <p> Enter your ingredients! </p>"

# Assuming that output_log, alert_out, and text are other widgets or display elements...
first_row = HBox([button, clear_button, quit_button])  # Arrange these buttons horizontally

# Arrange the two rows of buttons and other display elements vertically
layout = VBox([output_log, alert_out, usertext, first_row])

display(HTML(title))  # Use HTML function to display the title
display(layout)


VBox(children=(Output(), Output(), Textarea(value='', layout=Layout(width='800px')), HBox(children=(Button(desâ€¦