# play default

In [None]:
!pip install --q accelerate torch sentencepiece accelerate transformers bitsandbytes einops

In [None]:
import torch
import transformers
import pandas as pd
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
)


In [None]:
# Please use transformers==4.34.1
from transformers import AutoModelForCausalLM, AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained(
    "airesearch/WangchanLion7B", trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    "airesearch/WangchanLion7B", trust_remote_code=True,
    return_dict=True,
    load_in_8bit=True ,
    device_map="auto",
    torch_dtype=torch.float16,
    offload_folder="./",
    low_cpu_mem_usage=True,
)

In [None]:
def get_prompt(
    question: str,
    context: str = None
) -> str:
    if context is not None:
      return """พื้นหลัง:\n\n{context}\n\nคำถาม:{question}\n\nตอบ:""".format(context=context, question=question)
    return """คำถาม:{question}\n\nตอบ:""".format(question=question)

In [None]:
question = "เกิดอะไรขึ้นที่เทียนอันเหมินตอนปี 1989"


full_prompt = get_prompt(question=question)
tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")

output = model.generate(
    input_ids=tokens['input_ids'],
    attention_mask=tokens['attention_mask'],
    max_new_tokens=256,
    early_stopping=True,
    top_k=50, top_p=0.95,
    do_sample=True,
    temperature=0.3,
    repetition_penalty = 1.2,
    eos_token_id = tokenizer.eos_token_id,
)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
question = 'เขียนบทความเกี่ยวกับ "ประโยชน์ของทุเรียน""'


full_prompt = get_prompt(question=question)
tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")

output = model.generate(
    input_ids=tokens['input_ids'],
    attention_mask=tokens['attention_mask'],
    max_new_tokens=256,
    early_stopping=True,
    top_k=50, top_p=0.95,
    do_sample=True,
    temperature=0.3,
    repetition_penalty = 1.2,
    eos_token_id = tokenizer.eos_token_id,
)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
context = 'คลีโอพัตราที่ 6 ทรีฟาเอนา (กรีก: Κλεοπάτρα Τρύφαινα) (อังกฤษ: Cleopatra VI Tryphaena) เป็นสมเด็จพระราชินี แห่งอาณาจักรอียิปต์โบราณ จากราชวงศ์ทอเลมี พระนางเป็นพระธิดาในทอเลมีที่ 12กับคลีโอพัตราที่ 5 พระนางเป็นพระขนิษฐาของคลีโอพัตราที่ 7 ราชินีแห่งอียิปต์พระองค์ต่อมา'
question = 'คลีโอพัตราเคยเป็นราชินีที่ไหน'


full_prompt = get_prompt(question=question, context=context)
tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")

output = model.generate(
    input_ids=tokens['input_ids'],
    attention_mask=tokens['attention_mask'],
    max_new_tokens=256,
    early_stopping=True,
    top_k=50, top_p=0.95,
    do_sample=True,
    temperature=0.3,
    repetition_penalty = 1.2,
    eos_token_id = tokenizer.eos_token_id,
)
print(tokenizer.decode(output[0], skip_special_tokens=True))

# load data

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'openthaigpt-exercise-ungraded:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F75666%2F8360884%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240429%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240429T035040Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D8c0f4c54bd9b0d87db02da59f4a714441ef73018ee86a6575d5a2fb9d7e63b732ad8f91d9560c9aff8a1a23b444ca6a9301c70226438052d9637c37b22e96ef096ec9370e9ef0f46059f8c09cc0bd4e95ceb835ce0a2f0d7bf66e68bb351eb06725b9e3ef2c7e37f02e9a8225f86f990baf6711ba218688ca354d765dd0a9deae2c31ea3ed254bd67829685c3536d926e7899ea92ff44b5b0ebf31f87a1fc991b9223fe9d4fe92727544ea8009dc5d5e8eac9369f4bc75f89478fb2006956b474279254473a102bca03524f7ffd9231bdca652b0850dfdaf838432b5ab5801eda46304d55a707c049cc765945da650d81462a9e5bea335b64e751884691178ff'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')

In [None]:
import numpy as np
import pandas as pd

tbl4 = pd.read_csv('/kaggle/input/openthaigpt-exercise-ungraded/TBL4-Online-Shopping-Dataset.csv')
# data = pd.read_csv('/kaggle/input/openthaigpt-exercise-ungraded/data.csv')
# spsm = pd.read_csv('/kaggle/input/openthaigpt-exercise-ungraded/sample_submission.csv')

# prompt เพิ่มเติม

In [None]:
instruction_str = (
    "1. Convert the query to executable Python code using Pandas.\n"
    "2. The final line of code should be a Python expression that can be called with the `eval()` function.\n"
    "3. The code should represent a solution to the query.\n"
    "4. PRINT ONLY THE EXPRESSION.\n"
    "5. Do not quote the expression.\n"
    "6. Consider that, when asked about number of customer, you need to count for unique customer based on customer ID"
    """Consider these fields: CustomerID

Description: Unique identifier for each customer.

Data Type: Numeric;

Gender

Description: Gender of the customer (e.g., Male, Female).

Data Type: Categorical;

Location

Description: Location or address information of the customer.

Data Type: Text;

Tenure_Months

Description: Number of months the customer has been associated with the platform.
Data Type: Numeric;

Transaction_ID

Description: Unique identifier for each transaction.

Data Type: Numeric;

Transaction_Date

Description: Date of the transaction.

Data Type: Date;

Product_SKU

Description: Stock Keeping Unit (SKU) identifier for the product.

Data Type: Text;

Product_Description

Description: Description of the product.

Data Type: Text;

Product_Category:

Description: Category to which the product belongs.

Data Type: Categorical;

Quantity

Description: Quantity of the product purchased in the transaction.

Data Type: Numeric;

Avg_Price

Description: Average price of the product.

Data Type: Numeric;

Total_Price

Description: Total price of the product exclude delivery charges.

Data Type: Numeric;

Delivery_Charges

Description: Charges associated with the delivery of the product.

Data Type: Numeric;

Date

Description: Date of the transaction (potentially redundant with Transaction_Date).

Data Type: Date;

Month

Description: Month of the transaction.

Data Type: Numeric; """
)

pandas_prompt_str = (
    "You are working with a pandas dataframe in Python.\n"
    "The name of the dataframe is `df`.\n"
    "This is the result of `print(df.head())`:\n"
    "{df_str}\n\n"
    "Follow these instructions:\n"
    "{instruction_str}\n"
    "Query: {query_str}\n\n"
    "Expression:"
)

response_synthesis_prompt_str = (
    "Given an input question, synthesize a response from the query results.\n"
    "Query: {query_str}\n\n"
    "If the answer is a number, simply state the number, DO NOT add any other text"
    "Example: Do this : 100\nDo NOT DO this: The number of customer is 100."
    "If there're more than 2 decimal points, only print the first two."
    "Pandas Instructions (optional):\n{pandas_instructions}\n\n"
    "Pandas Output: {pandas_output}\n\n"
    "Response: "
)

In [None]:
query = "What is the average quantity of products purchased per transaction in october, please answer the number in 2 decimal places?"

In [None]:
pandas_prompt = pandas_prompt_str.format(df_str = tbl4.head(), instruction_str = instruction_str, query_str = query)
pandas_prompt

In [None]:
prompt_template = """[INST] <<SYS>
You are a question answering assistant. Answer the question as truthful and helpful as possible คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด
<</SYS>>

{human_prompt} [/INST]
"""
full_prompt = prompt_template.format(human_prompt = pandas_prompt)

tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")
output = model.generate(tokens["input_ids"], max_new_tokens=256, temperature = 0.55)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
prompt_template = "### USER:\n{human_prompt}\n\n### RESPONSE:\n"
prompt = """Write SQL code"""
full_prompt = prompt_template.format(human_prompt=pandas_prompt)

tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")
output = model.generate(tokens["input_ids"], max_new_tokens=256, temperature = 0.55)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
prompt_template = "### USER:\n{human_prompt}\n\n### RESPONSE:\n"
prompt = """You are working with tabular data.
This is a part of the table named test1
   CustomerID Gender Location  Tenure_Months  Transaction_ID Transaction_Date  \
0       17850      M  Chicago             12           16679       2019-01-01
1       17850      M  Chicago             12           16680       2019-01-01
2       17850      M  Chicago             12           16696       2019-01-01
3       17850      M  Chicago             12           16699       2019-01-01
4       17850      M  Chicago             12           16700       2019-01-01

      Product_SKU                                Product_Description  \
0  GGOENEBJ079499  Nest Learning Thermostat 3rd Gen-USA - Stainle...
1  GGOENEBJ079499  Nest Learning Thermostat 3rd Gen-USA - Stainle...
2  GGOENEBQ078999             Nest Cam Outdoor Security Camera - USA
3  GGOENEBQ079099    Nest Protect Smoke + CO White Battery Alarm-USA
4  GGOENEBJ079499  Nest Learning Thermostat 3rd Gen-USA - Stainle...

  Product_Category  Quantity  Avg_Price  Total_Price  Delivery_Charges  \
0         Nest-USA         1     153.71       153.71               6.5
1         Nest-USA         1     153.71       153.71               6.5
2         Nest-USA         2     122.77       245.54               6.5
3         Nest-USA         1      81.50        81.50               6.5
4         Nest-USA         1     153.71       153.71               6.5

       Date  Month
0  1/1/2019      1
1  1/1/2019      1
2  1/1/2019      1
3  1/1/2019      1
4  1/1/2019      1
เขียนโค้ด SQL เพื่อตอบคำถาม: ปริมาณสินค้าโดยเฉลี่ยที่ซื้อต่อธุรกรรมในเดือนตุลาคมคือเท่าใด โปรดตอบตัวเลขเป็นทศนิยม 2 ตำแหน่ง"""
prompt_template = """[INST] <<SYS>
คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด
<</SYS>>

{human_prompt} [/INST]
"""
full_prompt = prompt_template.format(human_prompt = prompt)

tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")
output = model.generate(tokens["input_ids"], max_new_tokens=256)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
prompt_template = """[INST] <<SYS>
คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด
<</SYS>>

{human_prompt} [/INST]
"""
prompt = """ เขียนคำสั่ง SQL ในการดึงข้อมูลจากคอลัมม์ที่ชื่อว่า "Quantity" จากตารางที่ชื่อว่า "Tabular" โดยแสดงผลแบบที่มีการเรียงข้อมูลให้ข้อมูลที่มีค่ามากอยู่บนหัวตาราง  """
full_prompt = prompt_template.format(human_prompt = prompt)

tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")
output = model.generate(tokens["input_ids"], max_new_tokens=256)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
prompt_template = """[INST] <<SYS>
คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด
<</SYS>>

{human_prompt} [/INST]
"""
prompt = """ จงอธิบายคำสั่ง df.groupby() ในภาษา Python  """
full_prompt = prompt_template.format(human_prompt = prompt)

tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")
output = model.generate(tokens["input_ids"], max_new_tokens=256)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
prompt_template = "<s>[INST] <<SYS>\n{system_prompt}\n<</SYS>>\n\n{human_prompt} [/INST]"
prompt = """
### CustomerID รหัสลูกค้า
Description: Unique identifier for each customer.
Data Type: Numeric;

### Gender
Description: Gender of the customer (e.g., Male, Female).
Data Type: Categorical;

### Location
Description: Location or address information of the customer.
Data Type: Text;

### Tenure_Months
Description: Number of months the customer has been associated with the platform.
Data Type: Numeric;

### Transaction_ID
Description: Unique identifier for each transaction.
Data Type: Numeric;

### Transaction_Date
Description: Date of the transaction.
Data Type: Date;

### Product_SKU
Description: Stock Keeping Unit (SKU) identifier for the product.
Data Type: Text;

### Product_Description
Description: Description of the product.
Data Type: Text;

### Product_Category:
Description: Category to which the product belongs.
Data Type: Categorical;

### Quantity
Description: Quantity of the product purchased in the transaction.
Data Type: Numeric;

### Avg_Price
Description: Average price of the product.
Data Type: Numeric;

### Total_Price
Description: Total price of the product exclude delivery charges.
Data Type: Numeric;

### Delivery_Charges
Description: Charges associated with the delivery of the product.
Data Type: Numeric;

### Date
Description: Date of the transaction (potentially redundant with Transaction_Date).
Data Type: Date;

### Month
Description: Month of the transaction.
Data Type: Categorical;

Here is SQL keywords:
ADD: Used to add columns to an existing table.
ALL: Used with the SELECT statement to retrieve all records.
ALTER: Used to modify existing database objects like tables.
AND: A logical operator used to combine multiple conditions in a WHERE clause.
AS: Used to rename a column or table in a query.
ASC: Specifies ascending order in the ORDER BY clause.
BETWEEN: Used to select values within a specified range.
BY: Used in conjunction with ORDER to sort query results.
CASE: Used to perform conditional logic in SQL statements.
CHECK: Specifies a condition for values allowed in a column.
COLUMN: Used to specify the columns in a table.
CONSTRAINT: Defines rules or conditions for data in a table.
CREATE: Used to create database objects like tables, views, or indexes.
DATABASE: Used to create a new database.
DEFAULT: Specifies a default value for a column.
DELETE: Used to remove records from a table.
DESC: Specifies descending order in the ORDER BY clause.
DISTINCT: Used to return only distinct (unique) values.
DROP: Used to remove database objects like tables, views, or indexes.
ELSE: Used in conjunction with CASE to specify an alternative result.
END: Marks the end of a code block.
ESCAPE: Used to search for special characters in a string.
EXCEPT: Used to return the distinct rows from the left query that are not in the right query.
EXISTS: Tests for the existence of any record in a subquery.
FOREIGN: Specifies a foreign key constraint in a table.
FROM: Specifies the tables from which to retrieve data in a SELECT statement.
FULL: Joins two tables and returns all records that match.
GROUP: Groups rows that have the same values into summary rows.
HAVING: Used in combination with GROUP BY to filter the results of aggregate functions.
IN: Specifies multiple values for a WHERE clause.
INDEX: Used to create and retrieve data from the database index.
INNER: Joins two tables and returns only the rows that have matching values.
INSERT: Used to insert new records into a table.
INTERSECT: Returns only the distinct rows that are common to both queries.
INTO: Specifies the table where data will be inserted.
IS: Tests for NULL values.
JOIN: Used to combine rows from two or more tables.
LEFT: Joins two tables and returns all records from the left table and matching records from the right table.
LIKE: Used to search for a specified pattern in a column.
LIMIT: Specifies the maximum number of records to return.
NOT: Negates a condition in a WHERE clause.
NULL: Represents missing or unknown data.
OR: A logical operator used to combine multiple conditions in a WHERE clause.
ORDER: Specifies the sorting order of the result set.
OUTER: Used in OUTER JOIN to specify that all records from one table should be returned, even if there are no matches in the other table.
PRIMARY: Specifies a primary key constraint in a table.
REFERENCES: Defines a foreign key constraint.
RIGHT: Joins two tables and returns all records from the right table and matching records from the left table.
ROWNUM: Specifies a row number in a query result set.
SELECT: Used to retrieve data from a database.
SET: Used to update data in a table.
TABLE: Used to create, modify, or drop a table.
TOP: Specifies the number of records to return from the beginning of a query result set.
TRUNCATE: Used to remove all records from a table quickly.
UNION: Combines the results of two or more SELECT statements.
UNIQUE: Specifies that a column or combination of columns must be unique.
UPDATE: Used to modify existing records in a table.
VALUES: Specifies the values to insert into a table.
VIEW: A virtual table based on the result-set of an SQL statement.
WHERE: Used to filter records based on specified conditions.
WITH: Specifies common table expressions (CTEs) for use within a query.

เขียน SQL โค้ดเพื่อตอบคำถามที่ว่า CustomerID ที่มียอดจำนวนเงินสั่งซื้อไม่รวมค่าขนส่งมากที่สุดมาจากที่เมืองไหน"""
full_prompt = prompt_template.format(human_prompt=prompt, system_prompt='คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด')

tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")
output = model.generate(tokens["input_ids"], max_new_tokens=256)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
instruction_str = (
    "1. Convert the query to executable SQL code using MySQL.\n"
    "2. Consider that, when asked about number of customer, you need to count for unique customer based on customer ID"
    """Consider these fields: CustomerID

Description: Unique identifier for each customer.

Data Type: Numeric;

Gender

Description: Gender of the customer (e.g., Male, Female).

Data Type: Categorical;

Location

Description: Location or address information of the customer.

Data Type: Text;

Tenure_Months

Description: Number of months the customer has been associated with the platform.
Data Type: Numeric;

Transaction_ID

Description: Unique identifier for each transaction.

Data Type: Numeric;

Transaction_Date

Description: Date of the transaction.

Data Type: Date;

Product_SKU

Description: Stock Keeping Unit (SKU) identifier for the product.

Data Type: Text;

Product_Description

Description: Description of the product.

Data Type: Text;

Product_Category:

Description: Category to which the product belongs.

Data Type: Categorical;

Quantity

Description: Quantity of the product purchased in the transaction.

Data Type: Numeric;

Avg_Price

Description: Average price of the product.

Data Type: Numeric;

Total_Price

Description: Total price of the product exclude delivery charges.

Data Type: Numeric;

Delivery_Charges

Description: Charges associated with the delivery of the product.

Data Type: Numeric;

Date

Description: Date of the transaction (potentially redundant with Transaction_Date).

Data Type: Date;

Month

Description: Month of the transaction.

Data Type: Numeric; """
)

sql_prompt_str = (
    "You are working with a csv table in SQL.\n"
    "The name of the table is `df`.\n"
    "This is the result of `SELECT * FROM df LIMIT 5;`:\n"
    "{df_str}\n\n"
    "Follow these instructions:\n"
    "{instruction_str}\n"
    "Query: {query_str}\n\n"
    "Expression:"
)

full_prompt = sql_prompt_str.format(df_str = tbl4.head(),
                                    instruction_str = instruction_str,
                                    query_str = 'เขียนโค้ด SQL เพื่อตอบคำถาม: ปริมาณสินค้าโดยเฉลี่ยที่ซื้อต่อธุรกรรมในเดือนตุลาคมคือเท่าใด')
tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")

output = model.generate(
    input_ids=tokens['input_ids'],
    attention_mask=tokens['attention_mask'],
    max_new_tokens=256,
    early_stopping=True,
    top_k=50, top_p=0.95,
    do_sample=True,
    temperature=0.3,
    repetition_penalty = 1.2,
    eos_token_id = tokenizer.eos_token_id,
)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
def get_prompt(
    question: str,
    context: str = None
) -> str:
    if context is not None:
      return """พื้นหลัง:\n\n{context}\n\nคำถาม:{question}\n\nตอบ:""".format(context=context, question=question)
    return """คำถาม:{question}\n\nตอบ:""".format(question=question)

In [None]:
sql_prompt_str = (
    "You are working with a csv table in SQL.\n"
    "The name of the table is `df`.\n"
    "This is the result of `SELECT * FROM df LIMIT 5;`:\n"
    "{df_str}\n\n"
    "Follow these instructions:\n"
    "{instruction_str}\n"
)

cont_prompt = sql_prompt_str.format(df_str = tbl4.head(),
                                    instruction_str = instruction_str)

full_prompt = get_prompt(question='เขียนโค้ด SQL เพื่อตอบคำถาม: ปริมาณสินค้าโดยเฉลี่ยที่ซื้อต่อธุรกรรมในเดือนตุลาคมคือเท่าใด',
                         context=cont_prompt)
tokens = tokenizer(full_prompt, return_tensors="pt").to("cuda")

output = model.generate(
    input_ids=tokens['input_ids'],
    attention_mask=tokens['attention_mask'],
    max_new_tokens=256,
    early_stopping=True,
    top_k=50, top_p=0.95,
    do_sample=True,
    temperature=0.3,
    repetition_penalty = 1.2,
    eos_token_id = tokenizer.eos_token_id,
)
print(tokenizer.decode(output[0], skip_special_tokens=True))