# Model Translation Pipeline

In this Notebook:
* Selecting a model
* Setting system and user prompt
* Generating translation and computing time


## Setup

In [None]:
import requests
import json
from google.colab import userdata
from time import time
import os
import pandas as pd
import numpy as np
from datetime import datetime

In [None]:
token = "" # openwebui api token
base_url = "" # openwebui api base url
headers = {"Authorization" : f"Bearer {token}", "Content-Type" : "application/json"}

In [None]:
# function for measuring translation times
def timer_func(func):
  def wrap_func(*args, **kwargs):
    start = time()
    result = func(*args, **kwargs)
    end = time()
    execution_time = end-start
    #print(f'Function {func.__name__!r} executed in {(end-start):.4f}s')
    return result, execution_time
  return wrap_func

## Getting Available Models

In [None]:
def get_models(base_url, headers):
  try:
    response = requests.get(f"{base_url}models", headers=headers, verify=False)
    return response.json()
  except requests.exceptions.RequestException as e:
    print(f"Error fetching models: {e}")
    return None

In [None]:
available_models = get_models(base_url, headers)
for model in available_models["data"]:
  print(model["id"])

## Generating the Translation

In [None]:
model = "gpt-4o-2024-11-20"
system_prompt = "Translate the text provided by the user from and in to the language specified by the user. Only return the translation."
base_user_prompt = "Translate from German to English: "

In [None]:
@timer_func
def chat_completion(base_url, model_name, user_prompt, system_prompt:str=None):
  headers = {"Authorization" : f"Bearer {token}", "Content-Type" : "application/json"}
  payload = {
      "model" : model,
      "messages": [
          {"role": "user", "content": user_prompt},
          {"role": "system", "content": system_prompt},
          ],
      "seed" : 42,
      "temperature" : 0.2,
      }

  response = requests.post(f"{base_url}chat/completions", json=payload, headers=headers, verify=False)
  return response

In [None]:
def generate_translation(source_path, model:str=model, base_url:str=base_url, system_prompt:str=None, base_user_prompt:str=base_user_prompt):
  predictions = []
  source = []
  execution_times = []

  with open(source_path, "r", encoding="utf-8") as file:
    for line in file:
      user_prompt = base_user_prompt + line.strip()
      source.append(line.strip())
      # generate translation
      response, execution_time = chat_completion(base_url, model, user_prompt)
      execution_times.append(execution_time)
      try:
        predictions.append(response.json()["choices"][0]["message"]["content"])
      except KeyError:
        print(response.json())

  return predictions, source, execution_times

### Storing the translation

In [None]:
# store translation
def store_translation(translation_file:str, predictions:list):
  with open(translation_file, "w", encoding="utf-8") as file:
    for translation in predictions:
      file.write(translation + "\n")

In [None]:
# store translation times
def store_translation_times(csv_name: str, execution_times: list, column: str):
    if os.path.isfile(csv_name):
        df = pd.read_csv(csv_name)
        new_column_df = pd.DataFrame({column: execution_times})
        df = pd.concat([df, new_column_df], axis=1)
    else:
        df = pd.DataFrame({column: execution_times})
    df.to_csv(csv_name, index=False)

### Generating the translation

In [None]:
source_dir = "source_files"
data_name = "DE-EN"
times_file = f"{model}_{data_name}_translation_times.csv"

for file_name in os.listdir(source_dir):
  print(file_name)
  short_file_name = file_name.split("_")[0] # file_name.split("_")[-1].split(".")[0]
  source_path = os.path.join(source_dir, file_name)
  translation_file = f"{data_name}_json_{datetime.now().strftime('%Y_%m_%d')}_{short_file_name}.txt"

  predictions, source, execution_times = generate_translation(source_path, model=model, base_url=base_url, base_user_prompt=base_user_prompt)
  store_translation(translation_file, predictions)
  store_translation_times(times_file, execution_times, short_file_name)