In [23]:
import requests, json

In [1]:
import os

In [3]:
'TASK_LOG' in os.environ

False

In [20]:
url = 'http://127.0.0.1:3000'
# API documentation: http://127.0.0.1:3000/docs

In [21]:
requests.get(f'{url}/info').json()

{'model_id': 'google/gemma-2b-it',
 'model_sha': None,
 'model_dtype': 'torch.float16',
 'model_device_type': 'cuda',
 'model_pipeline_tag': None,
 'max_concurrent_requests': 128,
 'max_best_of': 2,
 'max_stop_sequences': 4,
 'max_input_length': 1023,
 'max_total_tokens': 1024,
 'waiting_served_ratio': 0.3,
 'max_batch_total_tokens': 16000,
 'max_waiting_tokens': 20,
 'max_batch_size': None,
 'validation_workers': 2,
 'max_client_batch_size': 4,
 'version': '2.0.1',
 'sha': 'eade7377140a680a79bd2ce3f2d486314cf5a9b9',
 'docker_label': None}

In [24]:
schema = {
    "properties": {
        "location": {"title": "Location", "type": "string"},
        "activity": {"title": "Activity", "type": "string"},
        "animals_seen": {
            "maximum": 5,
            "minimum": 1,
            "title": "Animals Seen",
            "type": "integer",
        },
        "animals": {"items": {"type": "string"}, "title": "Animals", "type": "array"},
    },
    "required": ["location", "activity", "animals_seen", "animals"],
    "title": "Animals",
    "type": "object",
}

data = {
    "inputs": "convert to compact JSON in a single line: I saw a puppy a cat and a raccoon during my bike ride in the park",
    # 'inputs': 'I saw a puppy, a cat, and a raccoon during my bike ride in the park',
    "parameters": {
        "max_new_tokens": 100,
        # "repetition_penalty": 1.3,
        "temperature": 0.6,
        # 'seed': 1,
        "grammar": {
            "type": "json",
            "value": schema
        }
    }
}

response = requests.post(f'{url}/generate', headers={"Content-Type": "application/json",}, json=data)
raw = response.json()['generated_text'].strip()
# some models get stuck generating whitespace until the max tokens is reached, necessitating a final '}' to be added
if not raw.endswith('}'):
    raw += '}'
gen = json.loads(raw)
print(gen)

{'activity': 'Bike ride', 'animals': ['puppy', 'cat', 'raccoon']}


In [25]:
from openai import OpenAI

In [26]:
client = OpenAI(
    base_url=f"{url}/v1",
    api_key="-"
)

In [27]:
prompt = "What is deep learning?"

chat_completion = client.chat.completions.create(
    model="tgi",
    messages=[
        # {"role": "system", "content": "You are a helpful assistant." },
        {"role": "user", "content": prompt}
    ],
    max_tokens=100,
    temperature=0.8,
    seed=1,
    # n=2,
    stream=True
)
# print(chat_completion)
for token in chat_completion:
    if token.choices[0].delta.content:
        print(token.choices[0].delta.content, end='')
print()

**Deep Learning** is a subfield of machine learning that focuses on creating algorithms that can learn and adapt from vast amounts of data without explicit programming. This allows these algorithms to discover patterns and relationships in data that might be difficult or impossible for humans to perceive.

**Key characteristics of deep learning:**

* **Autoencoders and autoregressive models:** These are foundational architectures that use artificial neurons to learn hierarchical representations of data.
* **Multi-layered neural networks (MLNNs):
