In [1]:
!pip install --upgrade gdown litellm fastapi nest-asyncio uvicorn async_generator pycloudflared &> /dev/null

In [None]:
!curl https://ollama.ai/install.sh | sh &> /dev/null

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  8422    0  8422    0     0  21785      0 --:--:-- --:--:-- --:--:-- 21818


In [2]:
!gdown https://drive.google.com/drive/folders/1OqHpLJ3vttta1wZQcopLP18Eb05f5f2I -O ./folder --folder &> /dev/null

In [3]:
!nohup ollama serve &

nohup: appending output to 'nohup.out'


In [4]:
!ollama create PhatGPT -f ./folder/Modelfile &> /dev/null

In [5]:
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from litellm import acompletion
from pydantic import BaseModel
from typing import List
import json
import aiohttp

class RequestModel(BaseModel):
    model: str
    messages: List[dict]
    stream: bool

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)

host_ollama = "http://127.0.0.1:11434"

async def get_json_events(request: RequestModel):
  response = await acompletion(
      model=f"ollama/{request.model}",
      messages=request.messages,
      api_base=host_ollama,
      max_tokens=1024,
      stream=request.stream
  )
  async for chunk in response:
    if chunk['choices'][0]['finish_reason']:
      break
    yield chunk['choices'][0]['delta']["content"]

@app.post("/api/chat", response_class=StreamingResponse)
async def chat(request: RequestModel):
  return StreamingResponse(get_json_events(request))

@app.get("/api/tags")
async def tags():
  async with aiohttp.ClientSession() as session:
    async with session.get(f"{host_ollama}/api/tags") as resp:
      return await resp.json()

# if __name__ == '__main__':
#     uvicorn.run(app, port=8080, host='0.0.0.0', workers=1, debug=True)

In [6]:
import asyncio
from uvicorn import Config, Server

config = Config(app)
server = Server(config=config)
loop = asyncio.get_event_loop()
loop.create_task(server.serve())

<Task pending name='Task-1' coro=<Server.serve() running at /usr/local/lib/python3.10/dist-packages/uvicorn/server.py:64>>

In [7]:
import subprocess
import threading
import re

process = subprocess.Popen(['pycloudflared', 'tunnel', '--url', 'http://127.0.0.1:8000'],
                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

for line in iter(process.stdout.readline, ''):
    if '.trycloudflare.com' in line:
        url = re.search(r'https://[a-zA-Z0-9-]+\.trycloudflare\.com', line)
        if url:
            print(f"Tunnel URL: {url.group()}")
            break

Tunnel URL: https://yacht-specialty-everywhere-sandra.trycloudflare.com


INFO:     Started server process [16013]
INFO:     Waiting for application startup.


In [8]:
# import nest_asyncio

# nest_asyncio.apply()

# import uvicorn

# uvicorn.run(app)

In [9]:
url.group()

'https://yacht-specialty-everywhere-sandra.trycloudflare.com'