# Import / Config

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [42]:
import os
from pathlib import Path
from dotenv import load_dotenv
import edurel.utils.dbcon as dbcu
import edurel.utils.db as dbu
import edurel.utils.duckdb as ddbu
import edurel.utils.llm as llmu
import edurel.utils.llmchat as llmc
import edurel.widgets.mermaid_viz as mmw
import edurel.utils.misc as mu

load_dotenv() 
BASE_DIR = os.getenv("BASE_DIR")
DB_DIR = f"{BASE_DIR}/databases"


In [None]:
# con.close()

In [8]:
con = dbcu.ccfraud()
additional_fks = {}
# con = dbcu.company_en()
# additional_fks = {"OrgUnit": ["Employee|Head|EID", "OrgUnit|SuperUnit|OUID"]}

db = dbu.DbHandler(con,additional_fks=additional_fks)
schema = db.schema_yaml_str(["nullable", "fkname"])

In [5]:
glm46 = llmu.stats_c(llmu.GLM46)
chat = llmc.LLMChat(glm46)

In [6]:
viz = mmw.MermaidViz(chat._graph.get_graph().draw_mermaid(), height="300px", width="100%")
viz.display()

Output(layout=Layout(height='300px', width='100%'))

In [None]:
system_prompt = """
You are an expert SQL query generator. 
Convert natural language questions into valid SQL queries.
use postgres syntax.
"""
chat.set_system_prompt(system_prompt)


In [10]:
schema_prompt = f"""
The database schema is defined as follows:
{schema}
"""
# print(schema_prompt)
chat.add_user_message(schema_prompt)

'\n\nExcellent. I have analyzed the database schema for the `ccfraud` table. Here is a detailed breakdown of the table\'s structure, the likely meaning of each column, and some example queries you could run.\n\n### Table Summary\n\nThe `ccfraud` table stores individual credit card transaction records. Each row represents a single transaction attempt and includes details about the user, the card used, the transaction amount and time, the merchant, and a crucial flag indicating whether the transaction was fraudulent. This is a classic dataset for fraud detection analysis.\n\n---\n\n### Column-by-Column Breakdown\n\n| Column Name | Data Type | Description & Interpretation |\n| :--- | :--- | :--- |\n| **userid** | `SMALLINT` | A unique identifier for the cardholder. A single user can have multiple cards. |\n| **card** | `SMALLINT` | An identifier for the specific card used in the transaction. This is likely a foreign key to a `cards` table. |\n| **ts** | `TIMESTAMP` | The exact date and ti

In [None]:
q1a = """
create a SQL query that outputs the following columns:
- year
- noof_year number of records per year
- noof_all total number of records
- frac fraction of records per year 
sorted by tyear;
"""

user_msg = f"""
{q1a}
Return only the SQL query. Do not explain.
"""
chat.add_user_message(user_msg)

'\n\n```sql\nSELECT\n    EXTRACT(YEAR FROM ts) AS year,\n    COUNT(*) AS noof_year,\n    COUNT(*) OVER () AS noof_all,\n    CAST(COUNT(*) AS FLOAT) / COUNT(*) OVER () AS frac\nFROM\n    ccfraud\nGROUP BY\n    EXTRACT(YEAR FROM ts)\nORDER BY\n    year;\n```'

In [44]:
sql = mu.sql_extract(chat.get_message(-1).content)
print(sql)
db.sql_print(sql)

SELECT
    EXTRACT(YEAR FROM ts) AS year,
    COUNT(*) AS noof_year,
    COUNT(*) OVER () AS noof_all,
    CAST(COUNT(*) AS FLOAT) / COUNT(*) OVER () AS frac
FROM
    ccfraud
GROUP BY
    EXTRACT(YEAR FROM ts)
ORDER BY
    year;
┌───────┬───────────┬──────────┬──────────┐
│ year  │ noof_year │ noof_all │   frac   │
│ int64 │   int64   │  int64   │  float   │
├───────┼───────────┼──────────┼──────────┤
│  2010 │   1491225 │       10 │ 149122.5 │
│  2011 │   1570551 │       10 │ 157055.1 │
│  2012 │   1610829 │       10 │ 161082.9 │
│  2013 │   1650917 │       10 │ 165091.7 │
│  2014 │   1672343 │       10 │ 167234.3 │
│  2015 │   1701371 │       10 │ 170137.1 │
│  2016 │   1708924 │       10 │ 170892.4 │
│  2017 │   1723360 │       10 │ 172336.0 │
│  2018 │   1721615 │       10 │ 172161.5 │
│  2019 │   1723938 │       10 │ 172393.8 │
├───────┴───────────┴──────────┴──────────┤
│ 10 rows                       4 columns │
└─────────────────────────────────────────┘



In [45]:
user_msg = f"""
column frac has an error in its calculation. please fix it.
"""
chat.add_user_message(user_msg)

''

In [32]:
print(chat.show_conversation())

[0] USER: 
The database schema is defined as follows:
tables:
- tablename: ccfraud
  columns:
  - columnname: userid
    type: SMALLINT
  - columnname: card
    type: SMALLINT
  - columnname: ts
    type: TIMESTAMP
  - columnname: amount
    type: FLOAT
  - columnname: use_chip
    type: VARCHAR
  - columnname: mname
    type: BIGINT
  - columnname: mcity
    type: VARCHAR
  - columnname: mstate
    type: VARCHAR
  - columnname: zip
    type: VARCHAR
  - columnname: mcc
    type: INTEGER
  - columnname: err1
    type: VARCHAR
  - columnname: err2
    type: VARCHAR
  - columnname: err3
    type: VARCHAR
  - columnname: is_fraud
    type: VARCHAR



[1] AI: 

Excellent. I have analyzed the database schema for the `ccfraud` table. Here is a detailed breakdown of the table's structure, the likely meaning of each column, and some example queries you could run.

### Table Summary

The `ccfraud` table stores individual credit card transaction records. Each row represents a single transaction 

In [37]:
import edurel.utils.misc as mu


In [35]:
text = chat.get_message(-1).content

In [39]:
t = """
bla. bla. hello
house green
SELECT
    EXTRACT(YEAR FROM ts) AS year,
    COUNT(*) AS noof_year,
    COUNT(*) OVER () AS noof_all,
    CAST(COUNT(*) AS FLOAT) / COUNT(*) OVER () AS frac
FROM
    ccfraud
GROUP BY
    EXTRACT(YEAR FROM ts)
ORDER BY
    year;

bla hello
good moirning
"""

In [40]:
print(mu.sql_extract(t))

SELECT
    EXTRACT(YEAR FROM ts) AS year,
    COUNT(*) AS noof_year,
    COUNT(*) OVER () AS noof_all,
    CAST(COUNT(*) AS FLOAT) / COUNT(*) OVER () AS frac
FROM
    ccfraud
GROUP BY
    EXTRACT(YEAR FROM ts)
ORDER BY
    year;


In [41]:
t

'\nbla. bla. hello\nhouse green\nSELECT\n    EXTRACT(YEAR FROM ts) AS year,\n    COUNT(*) AS noof_year,\n    COUNT(*) OVER () AS noof_all,\n    CAST(COUNT(*) AS FLOAT) / COUNT(*) OVER () AS frac\nFROM\n    ccfraud\nGROUP BY\n    EXTRACT(YEAR FROM ts)\nORDER BY\n    year;\n\nbla hello\ngood moirning\n'