In [None]:
from openai import OpenAI
from pathlib import Path
import os
from dotenv import load_dotenv
import time
from collections import deque
import json
import gzip
import itertools
import re

load_dotenv()

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.environ['OPENROUTER_API_KEY']
)

output = Path('output')
conversations_root = Path('conversations')
conversations_root.mkdir(exist_ok=True, parents=True)


In [None]:

for retry in range(10000):
  start_time = time.time()
  completion = client.chat.completions.create(
    #model="deepseek/deepseek-chat-v3-0324:free",
    #model='google/gemini-2.5-pro-exp-03-25',
    #model='deepseek/deepseek-r1:free',
    #model='google/gemini-2.0-flash-exp:free',
    #model='meta-llama/llama-4-maverick:free',
    #model='deepseek/deepseek-chat:free',
    #model='microsoft/mai-ds-r1:free',
    #model='qwen/qwen3-235b-a22b:free',
    #model='meta-llama/llama-4-scout:free',
    #model='tngtech/deepseek-r1t-chimera:free',
    #model='nvidia/llama-3.1-nemotron-ultra-253b-v1:free',
    #model='google/gemma-3-27b-it:free',
    #model='mistralai/mistral-small-3.1-24b-instruct:free',
    model='google/gemini-2.0-flash-001',
    messages=[
      {
        "role": "user",
        "content": "Generate a plausible log file, as would be emitted from some application or service. It should contain both uninteresting and interesting lines. Reply ONLY with the log lines. No explanations, markdown quotes or any other form of framing."
      }
    ]
  )
  end_time = time.time()
  content = completion.choices[0].message.content
  if content.startswith('```'):
      print('undesired framing in llm response. retrying')
  else:
      break

print(content)
output.mkdir(exist_ok=True, parents=True)
for i in range(0, 1000000):
    outpath = output / f'{i}.log'
    if outpath.exists():
        continue
    break
else:
    assert False
with outpath.open('w') as f:
    f.write(content)

timings_path = Path('timings.txt')
with timings_path.open('a') as f:
    print(f'{outpath}\t{completion.model}\t{end_time - start_time}', file=f)


In [None]:
format_check = []
for p in output.glob('*.log'):
    logfile = p.read_text()
    def fun(msg: str):
      lmsg = msg.lower()
      if 'yes' in lmsg:
         return True
      if 'no' in lmsg:
         return False
      raise Exception(f'bad reply from model: {msg}')
    for retry in range(100000):
      try:
        completion = client.chat.completions.create(
          model='meta-llama/llama-4-maverick:free',
          messages=[
            {
              "role": "user",
              "content": f"Does the following text look like a raw log file? It mustn't contain any additional framing, only the actual log lines. Answer ONLY with yes or no:\n{logfile}"
            }
          ]
        )
        if hasattr(completion, 'error') and completion.error:
            raise Exception(f"API call failed: {completion.error}")
        good = fun(completion.choices[0].message.content)
        break
      except Exception as e:
        if retry < 10:
          print(f'{e}. retrying..')
        else:
          raise
    format_check.append((p, good))
    print(f'{p}: {good}')
          


In [None]:
system_prompt = {
                "role": "system",
                'content': '''You are a developer log analyzer.
Given a sequence of log lines. Rate only the last line. Use the prior lines only for context.
Rate that line by how interestingi you think that line is for diagnosing an issue with the system.
Output EXACTLY in this format:
```
Very brief single-sentence analysis on a single line
SCORE: 0-100
```

Do NOT include any code examples, snippets, or additional explanations.
Keep responses strictly limited to the analysis and score.
Do NOT include any additional framing such as markdown.

Score guide:
Low (0-30): Routine/minor info
Medium (31-70): Noteworthy/important
High (71-100): Critical/security issues
'''
}
formatre = re.compile(r'^.*\nSCORE: (?:100|\d{1,2})$')

#for log_path in [output / '0.log']:
for log_path in output.glob('*.log'):
  print(f'evaluating {log_path}\n===========\n\n')
  conversations = []
  history = deque(maxlen=3)
  with log_path.open('r') as f:
      for line in f.readlines():
        line = line.rstrip()
        history.append(line)
        lines = ''.join((f'{l}\n' for l in history))
        query = {
                   'role': 'user',
                   'content': lines,
                }
        for retry in range(4):
          completion = client.chat.completions.create(
                #model='meta-llama/llama-4-maverick:free',
                #model='google/gemini-2.0-flash-exp:free',
                #model='meta-llama/llama-4-scout',
                model='google/gemini-2.0-flash-001',
                messages=[
                  system_prompt,
                  query,
                ]
              )
          if hasattr(completion, 'error') and completion.error:
            raise Exception(f"API call failed: {completion.error}")
          assert completion.choices[0].message.role == 'assistant'
          message = completion.choices[0].message.content
          if formatre.match(message) is None:
             print(f'bad reply from model: {message}')
             continue
          break
        else:
           print('the model is obstinate, ignoring this line')
           continue
        print(f'{lines}{message}\n')
        # TODO: make sure the reply follows the requested structure
        conversations.append([
           system_prompt,
           query,
           message,
        ])
  conversation_path = conversations_root / log_path.relative_to(output).with_suffix('.json.gz')
  with gzip.open(conversation_path, 'wb') as f:
      f.write(json.dumps(conversations).encode('utf-8'))
