In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import sqlite3
from transformers import AutoTokenizer, AutoModelForCausalLM

from scripts.llm import LLM, prepare_query, generate_sql

## Chargment du LLM fine-tuned

In [3]:
model_id = "Qwen/Qwen2-1.5B-Instruct"

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    "./finetuned_llm/qwen2",
    torch_dtype="auto",
    device_map="auto",
    max_memory={0: "5GB"}
)

In [6]:
llm = LLM(model=model, tokenizer=tokenizer)

## Instanciation de la BDD

In [7]:
# creating file path
dbfile = "sqllite/atis-db.sqlite"
con = sqlite3.connect(dbfile)

## Requêtes SQL à tester:

1. `List ground transportation in BALTIMORE`

Résultat attendu:
| transport_type     |
| -----------------: |
| AIR TAXI OPERATION |
| LIMOUSINE          |
| RENTAL CAR         |

In [10]:
instruction = "List ground transportation in BALTIMORE"
query = prepare_query(instruction, "./prompt.json")
sql_generated = generate_sql(query, llm)
pd.read_sql_query(sql_generated, con)

Unnamed: 0,transport_type
0,AIR TAXI OPERATION
1,LIMOUSINE
2,RENTAL CAR


2. `What's the aircraft type with the greatest seating capacity`

Résultat attendu:
| aircraft_code |
| ------------: |
| 744           |

In [11]:
instruction = "What's the aircraft type with the greatest seating capacity" 
query = prepare_query(instruction, "./prompt.json")
sql_generated = generate_sql(query, llm)
pd.read_sql_query(sql_generated, con)

Unnamed: 0,aircraft_code
0,744


3. `What is the name of airport in Orlando ?`

Résultat attendu:
| airport_name          |
| ------------:         |
| ORLANDO INTERNATIONAL |

In [13]:
instruction = "What is the name of airport in Orlando ?"
query = prepare_query(instruction, "./prompt.json")
sql_generated = generate_sql(query, llm)
pd.read_sql_query(sql_generated, con)

Unnamed: 0,airport_name
0,ORLANDO INTERNATIONAL


4. `I need ground transportation in DALLAS please show me what is available`

Résultat attendu:
| transport_type |
| -------------: |
| LIMOUSINE      |
| RENTAL CAR     |

In [16]:
instruction = "I need ground transportation in DALLAS please show me what is available"
query = prepare_query(instruction, "./prompt.json")
sql_generated = generate_sql(query, llm)
pd.read_sql_query(sql_generated, con)

Unnamed: 0,transport_type
0,LIMOUSINE
1,RENTAL CAR


5. `Show me the flights from BOSTON to OAKLAND`

Résultat attendu:
|flight_id |
| -------: |
| 302340   |
| 302341   |
| 302342   |
| 303432   |
| 304884   |
| 304885   |
| 306660   |
| 306661   |
| 306662   |
| 306663   |
| 308971   |
| 308972   |

In [20]:
instruction = "Show me the flights from BOSTON to OAKLAND"
query = prepare_query(instruction, "./prompt.json")
sql_generated = generate_sql(query, llm)
pd.read_sql_query(sql_generated, con)

Unnamed: 0,flight_id
0,302340
1,302341
2,302342
3,303432
4,304884
5,304885
6,306660
7,306661
8,306662
9,306663
