Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions tutorials/batch-processing/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
# Groq Batch Processing

This tutorial will guide you through the process of uploading and processing batch jobs using the Groq API.


https://github.com/user-attachments/assets/8ef2e06b-6153-43d3-9122-6bc11b786efd


## Step 1: Set Up Dependencies:

Before interacting with the Groq API, you need to install and import the required dependencies.

```
import os
from dotenv import load_dotenv
import requests # Install with: pip install requests
import time

# Load environment variables from .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("GROQ_API_KEY")
```
Make sure you have a `.env` file containing your `GROQ_API_KEY`. If you don't have one already, you can create a free account and generate one [here](https://console.groq.com/keys).

### Create a virtual environment
`python3 -m venv venv`

### Activate it
`source venv/bin/activate`

### Install the packages
`pip3 install groq requests python-dotenv`

# Step 2: Upload the JSONL File to Groq
```
def upload_file_to_groq(api_key, file_path):
url = "https://api.groq.com/openai/v1/files"

headers = {
"Authorization": f"Bearer {api_key}"
}

files = {
"file": ("batch_file.jsonl", open(file_path, "rb"))
}

data = {"purpose": "batch"}

response = requests.post(url, headers=headers, files=files, data=data)
return response.json()

file_path = "batch_input.jsonl" # Path to your JSONL file
file_id = ""

try:
result = upload_file_to_groq(api_key, file_path)
file_id = result["id"]
print("This is the file_id from Step 2: " + file_id)
except Exception as e:
print(f"Error: {e}")
```

# Step 3: Create a Batch Object

Create a batch object using the uploaded file ID.

```
def create_batch(api_key, input_file_id):
url = "https://api.groq.com/openai/v1/batches"

headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}

data = {
"input_file_id": input_file_id,
"endpoint": "/v1/chat/completions",
"completion_window": "24h"
}

response = requests.post(url, headers=headers, json=data)
return response.json()

batch_id = ""
try:
result = create_batch(api_key, file_id)
batch_id = result["id"]
print("This is the Batch object id from Step 3: " + batch_id)
except Exception as e:
print(f"Error: {e}")
```



# Step 4: Get the Batch Status

Monitor the batch job's status until it completes.

```
def get_batch_status(api_key, batch_id):
url = f"https://api.groq.com/openai/v1/batches/{batch_id}"

headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}

response = requests.get(url, headers=headers)
return response.json()

output_file_id = ""
try:
result = get_batch_status(api_key, batch_id)
print("\nStep 4 results: ")

count = 0
while result["status"] != "completed" and count < 100:
result = get_batch_status(api_key, batch_id)
time.sleep(3)
print("Your batch status is: " + result["status"])
count += 1

output_file_id = result.get("output_file_id")
print("This is your output_file_id from Step 4: " + output_file_id)
except Exception as e:
print(f"Error: {e}")
```


# Step 5: Retrieve Batch Results

Download and save the batch job results.

```
def download_file_content(api_key, output_file_id, output_file):
url = f"https://api.groq.com/openai/v1/files/{output_file_id}/content"

headers = {
"Authorization": f"Bearer {api_key}"
}

response = requests.get(url, headers=headers)

with open(output_file, 'wb') as f:
f.write(response.content)

return f"\nFile downloaded successfully to {output_file}"

output_file = "batch_output.jsonl"
try:
result = download_file_content(api_key, output_file_id, output_file)
print(result)
except Exception as e:
print(f"Error: {e}")
```

Now you have successfully uploaded, processed, and retrieved batch job results using the Groq API!


## How to run it in your terminal:
`python3 main.py`


### Example output in terminal:
```
This is the file_id from Step 2: file_01jpthgx92e3ts09bma0bfchmt
This is the Batch object id from Step 3: batch_01jpthgxffe8ms4zqdkf1aejjp

Step 4 results:
Your batch status is: validating
Your batch status is: in_progress
Your batch status is: completed
This is your output_file_id from Step 4: file_01jpthh1e3e739wba761nfgvj2

File downloaded successfully to batch_output.jsonl
```

Example input .jsonl file
```
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful translation assistant. Translate the following into spanish."}, {"role": "user", "content": "Hello, how are you today?"}]}}
```

Example output .jsonl file
```
{"id":"batch_req_out_01jpra5p4ve4v8k14zkqn6agjm","custom_id":"request-2","response":{"status_code":200,"request_id":"req_01jpra5n2qef6s66k65v8sy51h","body":{"id":"chatcmpl-f822c700-75aa-4fa3-8f2c-3f65cca8a1d3","object":"chat.completion","created":1742425217,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"\"Hola, ¿cómo estás hoy\" or more commonly in informal settings: \"Hola, ¿qué onda hoy\". \n\nIf you want it to be a more formal conversation, you could use: \"Buenos días, ¿cómo está hoy?\""},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":1.0087154420000002,"prompt_tokens":55,"prompt_time":0.009901563,"completion_tokens":55,"completion_time":0.073333333,"total_tokens":110,"total_time":0.083234896},"system_fingerprint":"fp_076aab041c","x_groq":{"id":"req_01jpra5n2qef6s66k65v8sy51h"}}},"error":null}
```

View more detailed documentation on batch processing [here.](https://console.groq.com/docs/batch)
3 changes: 3 additions & 0 deletions tutorials/batch-processing/batch_input.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What is 2+2?"}]}}
{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful translation assistant. Translate the following into spanish."}, {"role": "user", "content": "Hello, how are you today?"}]}}
{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "llama-3.1-8b-instant", "messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "count up to 1000. starting with 1, 2, 3. print all the numbers, do not stop until you get to 1000."}]}}
3 changes: 3 additions & 0 deletions tutorials/batch-processing/batch_output.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"id":"batch_req_out_01jpthgypde739skmk9d2p7frc","custom_id":"request-2","response":{"status_code":200,"request_id":"req_01jpthgyahed0r94pxxy0bjbvr","body":{"id":"chatcmpl-8538d583-f91c-4883-a351-ff03bd6f17f9","object":"chat.completion","created":1742500035,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"\"Hola, ¿cómo estás hoy?\" \n\nHowever, a more casual and common way to say it in Spanish would be:\n\"Hola, ¿qué onda?\" (informal)\n\"Hola, ¿cómo va?\" (a bit more formal)\n\nIf you want to be more polite, you could say:\n\"Hola, ¿cómo se encuentra hoy?\""},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":0.27463279799999996,"prompt_tokens":55,"prompt_time":0.003482458,"completion_tokens":76,"completion_time":0.101333333,"total_tokens":131,"total_time":0.104815791},"system_fingerprint":"fp_a4265e44d5","x_groq":{"id":"req_01jpthgyahed0r94pxxy0bjbvr"}}},"error":null}
{"id":"batch_req_out_01jpthh170e7387j2e0r03p825","custom_id":"request-3","response":{"status_code":200,"request_id":"req_01jpthgyahe3vrg488eaczvbdm","body":{"id":"chatcmpl-3bedd913-0c82-482f-8c67-e9275055f153","object":"chat.completion","created":1742500035,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n36\n37\n38\n39\n40\n41\n42\n43\n44\n45\n46\n47\n48\n49\n50\n51\n52\n53\n54\n55\n56\n57\n58\n59\n60\n61\n62\n63\n64\n65\n66\n67\n68\n69\n70\n71\n72\n73\n74\n75\n76\n77\n78\n79\n80\n81\n82\n83\n84\n85\n86\n87\n88\n89\n90\n91\n92\n93\n94\n95\n96\n97\n98\n99\n100\n101\n102\n103\n104\n105\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n116\n117\n118\n119\n120\n121\n122\n123\n124\n125\n126\n127\n128\n129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n151\n152\n153\n154\n155\n156\n157\n158\n159\n160\n161\n162\n163\n164\n165\n166\n167\n168\n169\n170\n171\n172\n173\n174\n175\n176\n177\n178\n179\n180\n181\n182\n183\n184\n185\n186\n187\n188\n189\n190\n191\n192\n193\n194\n195\n196\n197\n198\n199\n200\n201\n202\n203\n204\n205\n206\n207\n208\n209\n210\n211\n212\n213\n214\n215\n216\n217\n218\n219\n220\n221\n222\n223\n224\n225\n226\n227\n228\n229\n230\n231\n232\n233\n234\n235\n236\n237\n238\n239\n240\n241\n242\n243\n244\n245\n246\n247\n248\n249\n250\n251\n252\n253\n254\n255\n256\n257\n258\n259\n260\n261\n262\n263\n264\n265\n266\n267\n268\n269\n270\n271\n272\n273\n274\n275\n276\n277\n278\n279\n280\n281\n282\n283\n284\n285\n286\n287\n288\n289\n290\n291\n292\n293\n294\n295\n296\n297\n298\n299\n300\n301\n302\n303\n304\n305\n306\n307\n308\n309\n310\n311\n312\n313\n314\n315\n316\n317\n318\n319\n320\n321\n322\n323\n324\n325\n326\n327\n328\n329\n330\n331\n332\n333\n334\n335\n336\n337\n338\n339\n340\n341\n342\n343\n344\n345\n346\n347\n348\n349\n350\n351\n352\n353\n354\n355\n356\n357\n358\n359\n360\n361\n362\n363\n364\n365\n366\n367\n368\n369\n370\n371\n372\n373\n374\n375\n376\n377\n378\n379\n380\n381\n382\n383\n384\n385\n386\n387\n388\n389\n390\n391\n392\n393\n393\n394\n395\n396\n397\n398\n399\n400\n401\n402\n403\n404\n405\n406\n407\n408\n409\n410\n411\n412\n413\n414\n415\n416\n417\n418\n419\n420\n421\n422\n423\n424\n425\n426\n427\n428\n429\n430\n431\n432\n433\n434\n435\n436\n437\n438\n439\n440\n441\n442\n443\n444\n445\n446\n447\n448\n449\n450\n451\n452\n453\n454\n455\n456\n457\n458\n459\n460\n461\n462\n463\n464\n465\n466\n467\n468\n469\n470\n471\n472\n473\n474\n475\n476\n477\n478\n479\n480\n481\n482\n483\n484\n485\n486\n487\n488\n489\n490\n491\n492\n493\n494\n495\n496\n497\n498\n499\n500\n501\n502\n503\n504\n505\n506\n507\n508\n509\n510\n511\n512\n513\n514\n515\n516\n517\n518\n519\n520\n521\n522\n523\n524\n525\n526\n527\n528\n529\n530\n531\n532\n533\n534\n535\n536\n537\n538\n539\n540\n541\n542\n543\n544\n545\n546\n547\n548\n549\n550\n551\n552\n553\n554\n555\n556\n557\n558\n559\n560\n561\n562\n563\n564\n565\n566\n567\n568\n569\n570\n571\n572\n573\n574\n575\n576\n577\n578\n579\n580\n581\n582\n583\n584\n585\n586\n587\n588\n589\n590\n591\n592\n593\n594\n595\n596\n597\n598\n599\n600\n601\n602\n603\n604\n605\n606\n607\n608\n609\n610\n611\n612\n613\n614\n615\n616\n617\n618\n619\n620\n621\n622\n623\n624\n625\n626\n627\n628\n629\n630\n631\n632\n633\n634\n635\n636\n637\n638\n639\n640\n641\n642\n643\n644\n645\n646\n647\n648\n649\n650\n651\n652\n653\n654\n655\n656\n657\n658\n659\n660\n661\n662\n663\n664\n665\n666\n667\n668\n669\n670\n671\n672\n673\n674\n675\n676\n677\n678\n679\n680\n681\n682\n683\n684\n685\n686\n687\n688\n689\n690\n691\n692\n693\n694\n695\n696\n697\n698\n699\n700\n701\n702\n703\n704\n705\n706\n707\n708\n709\n710\n711\n712\n713\n714\n715\n716\n717\n718\n719\n720\n721\n722\n723\n724\n725\n726\n727\n728\n729\n730\n731\n732\n733\n734\n735\n736\n737\n738\n739\n740\n741\n742\n743\n744\n745\n746\n747\n748\n749\n750\n751\n752\n753\n754\n755\n756\n757\n758\n759\n760\n761\n762\n763\n764\n765\n766\n767\n768\n769\n770\n771\n772\n773\n774\n775\n776\n777\n778\n779\n780\n781\n782\n783\n784\n785\n786\n787\n788\n789\n790\n791\n792\n793\n794\n795\n796\n797\n798\n799\n800\n801\n802\n803\n804\n805\n806\n807\n808\n809\n810\n811\n812\n813\n814\n815\n816\n817\n818\n819\n820\n821\n822\n823\n824\n825\n826\n827\n828\n829\n830\n831\n832\n833\n834\n835\n836\n837\n838\n839\n840\n841\n842\n843\n844\n845\n846\n847\n848\n849\n850\n851\n852\n853\n854\n855\n856\n857\n858\n859\n860\n861\n862\n863\n864\n865\n866\n867\n868\n869\n870\n871\n872\n873\n874\n875\n876\n877\n878\n879\n880\n881\n882\n883\n884\n885\n886\n887\n888\n889\n890\n891\n892\n893\n894\n895\n896\n897\n898\n899\n900\n901\n902\n903\n904\n905\n906\n907\n908\n909\n910\n911\n912\n913\n914\n915\n916\n917\n918\n919\n920\n921\n922\n923\n924\n925\n926\n927\n928\n929\n930\n931\n932\n933\n934\n935\n936\n937\n938\n939\n940\n941\n942\n943\n944\n945\n946\n947\n948\n949\n950\n951\n952\n953\n954\n955\n956\n957\n958\n959\n960\n961\n962\n963\n964\n965\n966\n967\n968\n969\n970\n971\n972\n973\n974\n975\n976\n977\n978\n979\n980\n981\n982\n983\n984\n985\n986\n987\n988\n989\n990\n991\n992\n993\n994\n995\n996\n997\n998\n999\n1000"},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":0.279345227,"prompt_tokens":75,"prompt_time":0.008248425,"completion_tokens":2003,"completion_time":2.670666667,"total_tokens":2078,"total_time":2.678915092},"system_fingerprint":"fp_a4265e44d5","x_groq":{"id":"req_01jpthgyahe3vrg488eaczvbdm"}}},"error":null}
{"id":"batch_req_out_01jpthgykte7396v5ape5vcty3","custom_id":"request-1","response":{"status_code":200,"request_id":"req_01jpthgyahe3vbw7k2bbrvgyee","body":{"id":"chatcmpl-5cbc35f5-82f3-4aaa-876e-9c6de82907e4","object":"chat.completion","created":1742500035,"model":"llama-3.1-8b-instant","choices":[{"index":0,"message":{"role":"assistant","content":"The answer to 2+2 is 4."},"logprobs":null,"finish_reason":"stop"}],"usage":{"queue_time":0.27736843099999997,"prompt_tokens":48,"prompt_time":0.003871881,"completion_tokens":12,"completion_time":0.016,"total_tokens":60,"total_time":0.019871881},"system_fingerprint":"fp_a4265e44d5","x_groq":{"id":"req_01jpthgyahe3vbw7k2bbrvgyee"}}},"error":null}
148 changes: 148 additions & 0 deletions tutorials/batch-processing/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""
Step 1. Set up dependencies
"""

import os
from dotenv import load_dotenv
import requests # pip install requests first!
import time

# Load environment variables from .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("GROQ_API_KEY")


"""
Step 2. Upload the JSONL file to Groq
"""

def upload_file_to_groq(api_key, file_path):
url = "https://api.groq.com/openai/v1/files"

headers = {
"Authorization": f"Bearer {api_key}"
}

# Prepare the file and form data
files = {
"file": ("batch_file.jsonl", open(file_path, "rb"))
}

data = {
"purpose": "batch"
}

# Make the POST request
response = requests.post(url, headers=headers, files=files, data=data)

return response.json()

# Usage example
file_path = "batch_input.jsonl" # Path to your JSONL file
file_id = "" # will be used in the next step

try:
result = upload_file_to_groq(api_key, file_path)
file_id = result["id"]
print("This is the file_id from Step 2: " + file_id)

except Exception as e:
print(f"Error: {e}")



"""
Step 3. Make a batch object
"""

def create_batch(api_key, input_file_id):
url = "https://api.groq.com/openai/v1/batches"

headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}

data = {
"input_file_id": input_file_id,
"endpoint": "/v1/chat/completions",
"completion_window": "24h"
}

response = requests.post(url, headers=headers, json=data)
return response.json()

batch_id = "" # will be used in the next step
try:
result = create_batch(api_key, file_id)
batch_id = result["id"] # batch result id
print("This is the Batch object id from Step 3: " + batch_id)

except Exception as e:
print(f"Error: {e}")



"""
Step 4. Get the batch status
"""

def get_batch_status(api_key, batch_id):
url = f"https://api.groq.com/openai/v1/batches/{batch_id}"

headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}

response = requests.get(url, headers=headers)
return response.json()

output_file_id = "" # will be used in the next step
try:
result = get_batch_status(api_key, batch_id)
print("\nStep 4 results: ")

count = 0

# Corrected condition: use `result["status"]` instead of `result.status`
while result["status"] != "completed" and count < 100:
result = get_batch_status(api_key, batch_id) # Update `result` inside the loop
time.sleep(3)
print("Your batch status is: " + result["status"])
count += 1

output_file_id = result.get("output_file_id") # Use .get() to safely access keys
print("This is your output_file_id from Step 4: " + output_file_id)
except Exception as e:
print(f"Error: {e}")



"""
Step 5. Retrieve batch results
"""

def download_file_content(api_key, output_file_id, output_file):
url = f"https://api.groq.com/openai/v1/files/{output_file_id}/content"

headers = {
"Authorization": f"Bearer {api_key}"
}

response = requests.get(url, headers=headers)

# Write the content to a file
with open(output_file, 'wb') as f:
f.write(response.content)

return f"\nFile downloaded successfully to {output_file}"

output_file = "batch_output.jsonl" # replace with your own file of choice to download batch job contents to
try:
result = download_file_content(api_key, output_file_id, output_file)
print(result)
except Exception as e:
print(f"Error: {e}")