In [None]:
# Download and install ollama to the system
!curl https://ollama.ai/install.sh | sh

In [None]:
!pip install aiohttp pyngrok nest_asyncio

import os
import asyncio
import subprocess

# 1) Keep your LD_LIBRARY_PATH tweak
os.environ['LD_LIBRARY_PATH'] = '/usr/lib64-nvidia' + os.pathsep + os.environ.get('LD_LIBRARY_PATH','')

# 2) Detect all NVIDIA GPUs and export CUDA_VISIBLE_DEVICES
def detect_and_set_gpus():
    try:
        out = subprocess.check_output(
            ['nvidia-smi','--query-gpu=index','--format=csv,noheader'],
            stderr=subprocess.DEVNULL,
            encoding='utf-8'
        )
        gpus = [line.strip() for line in out.splitlines() if line.strip().isdigit()]
        if gpus:
            os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(gpus)
            print(f"✅ CUDA_VISIBLE_DEVICES set to {os.environ['CUDA_VISIBLE_DEVICES']}")
        else:
            print("⚠️  No GPUs found, running CPU only.")
    except Exception as e:
        print("⚠️  GPU detection error:", e)

detect_and_set_gpus()

async def run_process(cmd):
    print('>>> starting', *cmd)
    p = await asyncio.subprocess.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
    async def pipe(stream):
        async for line in stream:
            print(line.decode('utf-8').rstrip())
    await asyncio.gather(pipe(p.stdout), pipe(p.stderr))

async def main():
    # your ngrok auth
    await run_process(['ngrok', 'config', 'add-authtoken', '1wyabPDW6B1CJdgEafS9x7BhK14_586mRJv9Je3QhJ2Bw4ucV'])
    
    # ollama serve – it’ll now see the GPUs via CUDA_VISIBLE_DEVICES
    await asyncio.gather(
        run_process(['ollama', 'serve']),
        run_process([
            'ngrok', 'http',
            '--log', 'stderr',
            '11434',
            '--host-header', 'localhost:11434'
        ]),
    )

# 3) Robust entrypoint: use asyncio.run or fallback to existing loop
if __name__ == '__main__':
    try:
        asyncio.run(main())
    except RuntimeError as e:
        # likely "loop is running" — install and apply nest_asyncio
        import nest_asyncio
        nest_asyncio.apply()
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main())
