In [None]:
from huggingface_hub import InferenceClient, login
import sqlite3
import re
import pandas as pd
import os
import json
import time

login(os.getenv('HF_TOKEN'))
hf_model = "mistralai/Mistral-7B-Instruct-v0.3"

In [2]:
def get_summary(cte_object) -> str:
  client = InferenceClient(model=hf_model)
  messages = [
      {'role':'user',
       'content':f"Explain in one sentence what this SQL code is doing:\n\n{cte_object.get('SQL')}"
       }
       ]
  response = client.chat_completion(messages, max_tokens=2000, seed=42, temperature=0)
  return response.choices[0].message.content

In [5]:
import time
from datetime import datetime, timedelta

def generate_json_files(file_path):
    """
    makes requests to the Hugging Face API to generate summaries for each CTE in the JSON file.
    returns a status code because hugging face has an hourly request limit.
    """
    with open(file_path, 'r+', encoding='utf-8') as file:
        data = json.load(file)
        
        for i, obj in enumerate(data):
            if "summary" not in obj or not obj["summary"]:
                try:
                    obj["summary"] = get_summary(obj)
                    # Overwrite the file immediately after generating the summary
                    file.seek(0)
                    json.dump(data, file, indent=2)
                    file.truncate()
                except Exception as e:
                    print(f"Error generating summary for {file_path} at index {i}: {e}")
                    # Pause and return to allow rerun later
                    return -1
    return 0

def sleep_until_next_hour():
    now = datetime.now()
    # Calculate the time until the next hour
    next_hour = (now + timedelta(hours=1)).replace(minute=0, second=0, microsecond=0)
    sleep_time = (next_hour - now).total_seconds()
    print(f"Sleeping for {int(sleep_time // 60)} minutes until {next_hour}.")
    time.sleep(sleep_time)

In [None]:
result = -1
while result != 0:
    result = generate_json_files('spider/spider2-lite-ctes.json')
    if result == -1:
        sleep_until_next_hour()
    elif result == 0:
        print("Finished successfully.")
        break