# Using Together Inference REST API

This is the simplest way to use Together Inference using REST API. You only need to provide your Together API key and YOU API key.

### Provide the API keys and define functions


In [None]:
YOU_API_KEY = "" # Provide your YOU API key.
TOGETHER_API_KEY = "" # Provide your TOGETHER API key.

import requests

def get_ai_snippets_for_query(query):
  headers = {"X-API-Key": YOU_API_KEY}
  results = requests.get(
      f"https://api.ydc-index.io/search?query={query}",
      headers=headers,
  ).json()

  # We return many text snippets for each search hit so
  # we need to explode both levels
  return "\n".join(["\n".join(hit["snippets"]) for hit in results["hits"]])

def get_together_prompt(query, context):
  return f"""Provide an answer based on the given context.\n
    Context: {context}\n
    Question: {query}"""

def ask_together(query, context, model_api_string):
  """
  Generate a response from the given query and context.

  Args:
    query: (str) your query.
    context: (str) your context from snippets.
    model_api_string: (str) a model API string from Together Inference. See the full list in (https://docs.together.ai/docs/inference-models)
  """
  # This is hard coded here. To automatically find the default values, use the
  # Python library as shown in the next section.
  prompt_format = "[INST]\n {prompt} \n[/INST]\n\n"
  stop_sequences =  ['[INST]', '\n\n']
  max_context_length = 4096

  # Truncate the context based on the model context length. Instead of using its
  # tokenizer and the exact token count, we assume 1 token ~= ¾ words.
  truncated_context = " ".join(context.split(" ")[:int(max_context_length*3/4)])
  prompt = get_together_prompt(query, truncated_context)

  # Formatting the prompt properly through the model info.
  if prompt_format:
    prompt_format_list = prompt_format.split(" ")
    formated_prompt = f"{prompt_format_list[0]}{prompt}{prompt_format_list[2]}"
  else:
    formated_prompt = prompt


  url = "https://api.together.xyz/inference"

  payload = {
      "model": model_api_string,
      "prompt": formated_prompt, # Use the corrrect prompt format.
      "max_tokens": 256,
      "stop":stop_sequences,
      "temperature": 1.0,
      "top_p": 0.7,
      "top_k": 50,
      "repetition_penalty": 1.1
  }
  headers = {
      "accept": "application/json",
      "content-type": "application/json",
      "Authorization": f"Bearer {TOGETHER_API_KEY}"
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code != 200:
      raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")

  return response.json()['output']['choices'][0]['text']

def ask_together_with_ai_snippets(query, model_api_string):
    ai_snippets = get_ai_snippets_for_query(query)
    return ask_together(query, ai_snippets, model_api_string)

### Send the query with a choice of your generation model from Together Inference API



In [None]:
query = "What are top 10 successful open source projects?"
model_api_string="togethercomputer/Llama-2-7B-32K-Instruct"
print(ask_together_with_ai_snippets(query, model_api_string))

1. Linux
2. TensorFlow
3. LightGBM
4. Elasticsearch
5. Kodi
6. Apache Tomcat
7. IncludeOS
8. Microsoft Cognitive Toolkit
9. C++
10. C#




# Using Together Inference Python Library

This is more useful when you want to find the default prompt format and stop sequences. You need to install the together library.

### Provide the API keys and define functions

In [None]:
!pip install together

In [None]:
import together
import requests

YOU_API_KEY = "" # Provide your YOU API key.
TOGETHER_API_KEY = "" # Provide your TOGETHER API key.

together.api_key = TOGETHER_API_KEY

def get_ai_snippets_for_query(query):
  headers = {"X-API-Key": YOU_API_KEY}
  results = requests.get(
      f"https://api.ydc-index.io/search?query={query}",
      headers=headers,
  ).json()

  # We return many text snippets for each search hit so
  # we need to explode both levels
  return "\n".join(["\n".join(hit["snippets"]) for hit in results["hits"]])

def get_together_prompt(query, context):
  return f"""You are an helpful assistant answering to a question based on
    provided context. Here is a context found on the internet: {context}.\n
    Answer the following question: {query}\n"""

def get_model_config(model_api_string):
  model_list = together.Models.list()

  prompt_format, stop_sequences = None, []
  context_length = 2048
  for m in model_list:
    if m['name'] == model_api_string:
      if 'prompt_format' in m['config']: prompt_format = m['config']['prompt_format']
      if 'stop' in m['config']: stop_sequences = m['config']['stop']
      if 'context_length' in m: context_length = m['context_length']
      break

  return prompt_format, stop_sequences, context_length

def ask_together(query, context, model_api_string):
  """
  Generate a response from the given query and context.

  Args:
    query: (str) your query.
    context: (str) your context from snippets.
    model_api_string: (str) a model API string from Together Inference. See the full list in (https://docs.together.ai/docs/inference-models)
  """
  prompt_format, stop_sequences, max_context_length = get_model_config(model_api_string)

  # Truncate the context based on the model context length. Instead of using its
  # tokenizer and the exact token count, we assume 1 token ~= ¾ words.
  truncated_context = " ".join(context.split(" ")[:int(max_context_length*3/4)])
  prompt = get_together_prompt(query, truncated_context)

  # Formatting the prompt properly through the model info.
  if prompt_format:
    prompt_format_list = prompt_format.split(" ")
    formated_prompt = f"{prompt_format_list[0]}{prompt}{prompt_format_list[2]}"
  else:
    formated_prompt = prompt

  response = together.Complete.create(
      prompt=formated_prompt,
      model=model_api_string,
      max_tokens = 256,
      temperature = 1.0,
      top_k = 60,
      top_p = 0.6,
      repetition_penalty = 1.1,
      stop = stop_sequences,
      )

  return response["output"]["choices"][0]["text"]

def ask_together_with_ai_snippets(query, model_api_string):
    ai_snippets = get_ai_snippets_for_query(query)
    return ask_together(query, ai_snippets, model_api_string)


### Send the query with a choice of your generation model from Together Inference API

In [None]:
query = "What are top 10 successful open source projects?"
model_api_string="togethercomputer/Llama-2-7B-32K-Instruct"
print(ask_together_with_ai_snippets(query, model_api_string))

1. TensorFlow
2. Linux
3. Kubernetes
4. Apache
5. Python
6. Git
7. Android
8. GitHub
9. Jenkins
10. WordPress


