In [1]:
import os
import getpass
from google import genai
from google.genai import types
import json
import re
from datetime import datetime

In [2]:
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")


In [None]:
client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
response = client.models.generate_content(
    model="gemini-2.0-flash", contents="Explain how AI works"
)
print(response.text)

In [None]:
def process_llm_response(response_text):
    """
    Processes an LLM response, extracts the question-answer pair, 
    saves each answer as a separate JSON file, and logs all Q&A pairs in qa_log.json.
    """
    # Clean up the response text
    data = response_text.replace("```json", "").replace("```", "").strip()

    # Convert string to JSON object
    try:
        json_data = json.loads(data)
    except json.JSONDecodeError:
        print("Error: Invalid JSON format.")
        return

    # Ensure the LLM_answers directory exists
    llm_answers_dir = "LLM_answers"
    os.makedirs(llm_answers_dir, exist_ok=True)

    # Generate a unique filename with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    json_filename = f"{llm_answers_dir}/answer_{timestamp}.json"

    # Save individual answer JSON
    with open(json_filename, "w", encoding="utf-8") as json_file:
        json.dump(json_data, json_file, indent=4)

    # Log all Q&A pairs in a central file
    log_filename = "qa_log.json"

    # Load existing log file or create a new list
    if os.path.exists(log_filename):
        with open(log_filename, "r", encoding="utf-8") as log_file:
            try:
                qa_log = json.load(log_file)
            except json.JSONDecodeError:
                qa_log = []
    else:
        qa_log = []

    # Append new question-answer pair to the log
    qa_log.append(json_data)

    # Save updated log file
    with open(log_filename, "w", encoding="utf-8") as log_file:
        json.dump(qa_log, log_file, indent=4)

    print(f"Saved answer as {json_filename} and updated {log_filename}.")


In [None]:

folder_path = r"divider\ddls"
list_of_text = []
sql_files = sorted([f for f in os.listdir(folder_path) if f.endswith(".sql")])
#comment_files = sorted([f for f in os.listdir(folder_path) if f.endswith(".comment")])

for filename in sql_files:
    sql_file_path = os.path.join(folder_path, filename)

    print("sql path::::", sql_file_path)
    with open(sql_file_path, "r") as file:
        sql_content = file.read()
        
        print(f"SQL script {filename} executed successfully.")

    comment_file_path = sql_file_path.replace(".sql", ".comment")
    print(comment_file_path)
    try:
        with open(comment_file_path, "r") as file:
            comment_content = file.read()

            print(f"Comment script {filename} executed successfully.")
            
    except FileNotFoundError:
        print(f"Error: File not found at {comment_file_path}")

    list_of_text.append((sql_content, comment_content))

In [49]:
# Define schema
allowed_nodes = ["ColumnName", 
                 "TableName",
                 "Data Type",
                 "SchemaName",
                 "Comment",
                 "Summary"
                 ]

allowed_relationships = [
("ColumnName", "PART_OF", "TableName"), 
("ColumnName", "TYPE_OF", "Data Type"), 
("TableName", "PART_OF", "SchemaName"), 
("Comment", "DESCRIBES", "ColumnName"), 
("Summary", "DESCRIBES", "TableName")
]

In [50]:
client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])

In [52]:
sys_instruct=f"""You are a system and you need to extract entities and knowledge from DDL scripts

### Allowed Nodes ###

{allowed_nodes}

### Allowed Relationships ###

{allowed_relationships}

### For "Summary" infere what kind of information would be stored there ###

"""

for pair in list_of_text:
    text = [pair[0], pair[1]]

    response = client.models.generate_content(
        model="gemini-2.0-flash",
        config=types.GenerateContentConfig(
        # max_output_tokens=500,
            temperature=0.1,
            system_instruction=sys_instruct),
        contents=text
    )

    process_llm_response(response.text)

Saved answer as LLM_answers/answer_20250305_022711.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022715.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022724.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022736.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022750.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022811.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022828.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022837.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022849.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022901.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022906.json and updated qa_log.json.
Saved answer as LLM_answers/answer_20250305_022916.json and updated qa_log.json.
Saved answer as LLM_answers/

In [47]:
#print(response.text)