In [None]:
# Download and install ollama to the system
!curl https://ollama.ai/install.sh | sh

In [None]:
# Install python dependencies
!pip install aiohttp pyngrok python-dotenv

In [3]:
import os
import asyncio
from dotenv import load_dotenv

#Loading environment variables
load_dotenv()

# Set LD_LIBRARY_PATH so the system NVIDIA library
os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})

async def run_process(cmd):
    #Create a subprocess
    print('>>> starting', *cmd)
    p = await asyncio.subprocess.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )

    # Collect the output
    async def pipe(lines):
        async for line in lines:
                print(line.strip().decode('utf-8'))

    await asyncio.gather(
        pipe(p.stdout),
        pipe(p.stderr),
    )

In [None]:
#register an account at ngrok.com and create an authtoken and place it here
await asyncio.gather(
    run_process(['ngrok', 'config', 'add-authtoken', os.environ["NGROK_TOKEN"]])
)

In [None]:
# run ollama subprocess and expose it from localhost:11434 with ngrok to the public with a url
await asyncio.gather(
    run_process(['ollama', 'serve']),
    run_process(['ngrok', 'http', '--log', 'stderr', '11434', '--host-header', 'localhost:11434'])
)