# Import / Config

In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
import os
from pathlib import Path
from dotenv import load_dotenv

from IPython.display import Markdown, display

import edurel.utils.dbcon as dbcu
import edurel.utils.db as dbu
import edurel.utils.duckdb as ddbu
import edurel.utils.llm as llmu
import edurel.utils.llmchat as llmc
import edurel.widgets.mermaid_viz as mmw
import edurel.widgets.chatman as cmw
import edurel.utils.misc as mu

load_dotenv() 
BASE_DIR = os.getenv("BASE_DIR")
DB_DIR = f"{BASE_DIR}/databases"

def get_sql(chat, i):
    return mu.sql_extract(chat.get_message(i).content)


# Database

In [10]:
con = dbcu.playlist_en()
additional_fks = {}

db = dbu.DbHandler(con,additional_fks=additional_fks)
schema_yaml = db.schema_yaml_str(["nullable", "fkname"])
# db.schema_mermaid_png(output_path="schema.png", omit_tags=["nullable", "fkname"], direction="TB", scale=8)

In [37]:
print(schema_yaml)

tables:
- tablename: Genre
  columns:
  - columnname: GID
    type: INTEGER
  - columnname: Description
    type: VARCHAR
  primary_key:
  - GID
- tablename: Playlist
  columns:
  - columnname: PLID
    type: INTEGER
  - columnname: Name
    type: VARCHAR
  - columnname: LastCall
    type: DATE
  primary_key:
  - PLID
- tablename: Title
  columns:
  - columnname: TID
    type: INTEGER
  - columnname: Name
    type: VARCHAR
  - columnname: DurationInSec
    type: INTEGER
  - columnname: PLID
    type: INTEGER
  - columnname: GID
    type: INTEGER
  primary_key:
  - TID
  foreign_keys:
  - sourcecolumns:
    - PLID
    targettable: Playlist
    targetcolumns:
    - PLID
  - sourcecolumns:
    - GID
    targettable: Genre
    targetcolumns:
    - GID



# Prompts

In [14]:
system_prompt = """
You are an expert SQL query generator. 
Convert natural language questions into valid SQL queries.
use duckdb syntax.
"""

In [30]:
schema_prompt = f"""
Database schema:
```yaml
{schema_yaml}
``` 
"""

## q1

In [39]:
q1 = """
# create a SQL query 
## output columns:
- playlist: name of playlist
## calculation
- playlists that only contain titles of a single genre
## sorting
- by playlist
## output
```sql
text of the SQL query
```
- Return only the SQL query. Do not explain.
"""

# OPUS41

In [8]:
opus41 = llmu.stats_c(llmu.OPUS41)

## q1

In [None]:
chat = llmc.LLMChat(opus41)
chat.set_system_prompt(system_prompt)

In [31]:
chat.clear_conversation()

In [32]:
chat.add_user_message(schema_prompt)

"I'm ready to help you generate SQL queries for this database schema. The database contains information about music with three tables:\n\n1. **Genre** - Contains genre information (GID, Description)\n2. **Playlist** - Contains playlist information (PLID, Name, LastCall date)\n3. **Title** - Contains song/title information (TID, Name, DurationInSec) with foreign keys to Playlist and Genre\n\nPlease provide your natural language question, and I'll convert it into a valid DuckDB SQL query."

In [40]:
chat.add_user_message(q1)


'```sql\nSELECT p.Name AS playlist\nFROM Playlist p\nINNER JOIN Title t ON p.PLID = t.PLID\nGROUP BY p.PLID, p.Name\nHAVING COUNT(DISTINCT t.GID) = 1\nORDER BY playlist\n```'

# Conversation History

In [48]:
display(Markdown(chat.show_conversation(1))) 

[4] AI:
 ```sql
SELECT p.Name AS playlist
FROM Playlist p
INNER JOIN Title t ON p.PLID = t.PLID
GROUP BY p.PLID, p.Name
HAVING COUNT(DISTINCT t.GID) = 1
ORDER BY playlist
```

In [49]:
db.sql_print(get_sql(chat, -1))

┌──────────────┐
│   playlist   │
│   varchar    │
├──────────────┤
│ danceout     │
│ hard n heavy │
└──────────────┘

