# asyncflows Hello World Example

This notebook demonstrates the Hello World example with Llama 3, by running Ollama on Google Colab.

To use a Google Colab GPU, follow these steps before running the notebook:

1. Go to `Edit –> Notebook Settings`
2. Select a hardware accelerator (T4 GPU is free)

In [5]:
# @title Install asyncflows

!pip install asyncflows

# from IPython.display import clear_output
# clear_output()

Processing ./asyncflows-0.0.0-py3-none-any.whl
Collecting aioboto3<13.0.0,>=12.2.0 (from asyncflows==0.0.0)
  Downloading aioboto3-12.4.0-py3-none-any.whl (32 kB)
Collecting cryptography<42.0.0,>=41.0.4 (from asyncflows==0.0.0)
  Downloading cryptography-41.0.7-cp37-abi3-manylinux_2_28_x86_64.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datamodel-code-generator<0.26.0,>=0.25.6 (from asyncflows==0.0.0)
  Downloading datamodel_code_generator-0.25.6-py3-none-any.whl (108 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.1/108.1 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
Collecting litellm<2.0.0,>=1.30.3 (from asyncflows==0.0.0)
  Downloading litellm-1.37.19-py3-none-any.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
Collecting numpy<2.0.0,>=1.26.2 (from asyncflows==0.0.0)
  Dow

In [2]:
# @title Start Ollama, download Llama 3
!curl https://ollama.ai/install.sh | sh

import os
import asyncio
import threading

# NB: You may need to set these depending and get cuda working depending which backend you are running.
# Set environment variable for NVIDIA library
# Set environment variables for CUDA
os.environ['PATH'] += ':/usr/local/cuda/bin'
# Set LD_LIBRARY_PATH to include both /usr/lib64-nvidia and CUDA lib directories
os.environ['LD_LIBRARY_PATH'] = '/usr/lib64-nvidia:/usr/local/cuda/lib64'

async def run_process(cmd):
    print('>>> starting', *cmd)
    process = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE
    )

    # define an async pipe function
    async def pipe(lines):
        async for line in lines:
            print(line.decode().strip())

        await asyncio.gather(
            pipe(process.stdout),
            pipe(process.stderr),
        )

    # call it
    await asyncio.gather(pipe(process.stdout), pipe(process.stderr))

async def start_ollama_serve():
    await run_process(['ollama', 'serve'])

def run_async_in_thread(loop, coro):
    asyncio.set_event_loop(loop)
    loop.run_until_complete(coro)
    loop.close()

# Create a new event loop that will run in a new thread
new_loop = asyncio.new_event_loop()

# Start ollama serve in a separate thread so the cell won't block execution
thread = threading.Thread(target=run_async_in_thread, args=(new_loop, start_ollama_serve()))
thread.start()

await asyncio.sleep(1)

!ollama pull llama3

# from IPython.display import clear_output
# clear_output()

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0>>> Downloading ollama...
100 10091    0 10091    0     0  32162      0 --:--:-- --:--:-- --:--:-- 32239
############################################################################################# 100.0%
>>> Installing ollama to /usr/local/bin...
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
>>> starting ollama serve
Couldn't find '/root/.ollama/id_ed25519'. Generating new private key.
Your new public key is:

ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIA2gogN6Gsu9VTPk/Q3oNj9zUr9S/ZK83vNVFnQH8Ble

2024/05/21 18:25:53 routes.go:1008: INFO server

In [6]:
# @title Define the Flow

flow_config = """

default_model:
  model: ollama/llama3
  max_output_tokens: 50
flow:
  hello_world:
    action: prompt
    prompt:
      - text: Can you say hello world for me?
default_output: hello_world.result

"""

In [7]:
# @title Run the Flow

from asyncflows import AsyncFlows

# Load the flow from the file
flow = AsyncFlows.from_text(flow_config)

# Run the flow
await flow.run()

[2m2024-05-21 18:28:37[0m [[32m[1minfo     [0m] [1mCache miss                    [0m [36maction[0m=[35mprompt[0m [36maction_id[0m=[35mhello_world[0m
[2m2024-05-21 18:28:37[0m [[32m[1minfo     [0m] [1mAction started                [0m [36maction[0m=[35mprompt[0m [36maction_id[0m=[35mhello_world[0m


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


time=2024-05-21T18:28:40.537Z level=INFO source=memory.go:133 msg="offload to gpu" layers.requested=-1 layers.real=33 memory.available="14.6 GiB" memory.required.full="5.0 GiB" memory.required.partial="5.0 GiB" memory.required.kv="256.0 MiB" memory.weights.total="4.1 GiB" memory.weights.repeating="3.7 GiB" memory.weights.nonrepeating="411.0 MiB" memory.graph.full="164.0 MiB" memory.graph.partial="677.5 MiB"
time=2024-05-21T18:28:40.540Z level=INFO source=memory.go:133 msg="offload to gpu" layers.requested=-1 layers.real=33 memory.available="14.6 GiB" memory.required.full="5.0 GiB" memory.required.partial="5.0 GiB" memory.required.kv="256.0 MiB" memory.weights.total="4.1 GiB" memory.weights.repeating="3.7 GiB" memory.weights.nonrepeating="411.0 MiB" memory.graph.full="164.0 MiB" memory.graph.partial="677.5 MiB"
time=2024-05-21T18:28:40.551Z level=INFO source=server.go:320 msg="starting llama server" cmd="/tmp/ollama478864012/runners/cuda_v11/ollama_llama_server --model /root/.ollama/mod

'Hello, World!'