In [45]:
import pandas as pd
from utils import read_config
from langchain.prompts import PromptTemplate
from langchain_openai.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)

from langchain.chat_models  import AzureChatOpenAI

In [46]:
configs = read_config(".env/info.json")

aoai_configs = configs['AOAI']

OPENAI_API_BASE = aoai_configs['OPENAI_API_BASE']
OPEN_AI_VERSION = aoai_configs['OPEN_AI_VERSION']
GPT_DEPLOYMENT_NAME = aoai_configs['GPT_DEPLOYMENT_NAME']
OPENAI_API_KEY = aoai_configs['OPENAI_API_KEY']
OPENAI_API_TYPE = aoai_configs['OPENAI_API_TYPE']

llm = AzureChatOpenAI(
    azure_endpoint=OPENAI_API_BASE,
    openai_api_version=OPEN_AI_VERSION,
    azure_deployment=GPT_DEPLOYMENT_NAME,
    openai_api_key=OPENAI_API_KEY,
    openai_api_type="azure",
)


In [47]:
bidb_view = pd.read_csv('./result/temp3.csv')
bidb_view['fixed_lineage'] = ""
bidb_view

Unnamed: 0,view_name,text,input,lineage,fixed_lineage
0,C$_0W_YFY_AV_TW_R,"select ""C1_ROW_ID"",""C2_ORG_ID"",""C3_REF_AV_HEAD...","select ""C1_ROW_ID"",""C2_ORG_ID"",""C3_REF_AV_HEAD...","```json\n{\n ""Datasource"": [""ODS.W_YFY_AV_T...",
1,C$_0W_YFY_IND_FIN_INFO_FS,"select FIN_INFO.SEQ C1_SEQ, FIN_INFO.ACC...","select FIN_INFO.SEQ C1_SEQ, FIN_INFO.ACC...","{\n ""Union1"": {\n ""Datasource"": [""OD...",
2,OP_FACT_CHP_INVENTORY_ETH_PULP,"SELECT PERIOD_NAME,STOCK_DATE TDATE,ORG_CODE,'...","SELECT PERIOD_NAME,STOCK_DATE TDATE,ORG_CODE,'...","{\n ""Union1"": {\n ""Datasource"": ['W_...",
3,OP_FACT_CHP_INVENTORY_REDEFINE,"SELECT PERIOD_NAME,TDATE,ORG_CODE ,CASE ORG_...","SELECT PERIOD_NAME,TDATE,ORG_CODE ,CASE ORG_...","```json\n{\n ""Union1"": {\n ""Datasour...",
4,OP_FACT_CHP_SALES_DETAILS,"SELECT '當月受訂' TYPE, --GREATEST(F.REQU...","SELECT '當月受訂' TYPE, --GREATEST(F.REQU...","```json\n{\n ""Union1"": {\n ""Datasour...",


In [48]:
system_template = """
Fix the following string to make the format properly if it is incorrect.
For the input is correct, just return 'nochange'.

Only return the result without any other words.
Remember, just fix the wrong format, keep any other things unchange and don't add ```json things.

input: {input_string}
"""
messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{input_string}")
]

CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)

In [49]:
chain = CHAT_PROMPT | llm
for idx, row in bidb_view.iterrows():
    input_data = {
        'input_string': row.lineage,
    }
    llm_response = chain.invoke(input_data)

    bidb_view.at[idx, 'fixed_lineage'] = llm_response.content

In [50]:
bidb_view

Unnamed: 0,view_name,text,input,lineage,fixed_lineage
0,C$_0W_YFY_AV_TW_R,"select ""C1_ROW_ID"",""C2_ORG_ID"",""C3_REF_AV_HEAD...","select ""C1_ROW_ID"",""C2_ORG_ID"",""C3_REF_AV_HEAD...","```json\n{\n ""Datasource"": [""ODS.W_YFY_AV_T...",nochange
1,C$_0W_YFY_IND_FIN_INFO_FS,"select FIN_INFO.SEQ C1_SEQ, FIN_INFO.ACC...","select FIN_INFO.SEQ C1_SEQ, FIN_INFO.ACC...","{\n ""Union1"": {\n ""Datasource"": [""OD...",nochange
2,OP_FACT_CHP_INVENTORY_ETH_PULP,"SELECT PERIOD_NAME,STOCK_DATE TDATE,ORG_CODE,'...","SELECT PERIOD_NAME,STOCK_DATE TDATE,ORG_CODE,'...","{\n ""Union1"": {\n ""Datasource"": ['W_...",nochange
3,OP_FACT_CHP_INVENTORY_REDEFINE,"SELECT PERIOD_NAME,TDATE,ORG_CODE ,CASE ORG_...","SELECT PERIOD_NAME,TDATE,ORG_CODE ,CASE ORG_...","```json\n{\n ""Union1"": {\n ""Datasour...",nochange
4,OP_FACT_CHP_SALES_DETAILS,"SELECT '當月受訂' TYPE, --GREATEST(F.REQU...","SELECT '當月受訂' TYPE, --GREATEST(F.REQU...","```json\n{\n ""Union1"": {\n ""Datasour...",nochange


In [52]:
bidb_view.to_csv('result/reformated_temp_3(good one).csv', index=False, encoding='big5')

In [None]:
"""
{
    "Union1": {
        "Datasource": ["W_CHP_SALES_NOMANUAL_F", "W_CHP_ORDER_TYPE_D", "W_CHP_PAPER_SALES_ORNT_R", "W_CHP_PAPER_STAT_GROUP_R", "W_YFY_ORG_D", "W_CHP_PAPER_STAT_GROUP_R(cg)"],
        "Filter": [
            "W_CHP_ORDER_TYPE_D.USED = '損益'",
            "W_CHP_SALES_NOMANUAL_F.ORDER_LINE_STATUS = 'AWAITING_SHIPPING'",
            "GREATEST(W_CHP_SALES_NOMANUAL_F.REQUEST_DATE, W_CHP_SALES_NOMANUAL_F.SCHEDULE_SHIP_DATE) >= trunc(ADD_MONTHS(sysdate, -2), 'mm')",
            "W_CHP_SALES_NOMANUAL_F.om_customer_no <> 'S0001'",
            "(W_CHP_SALES_NOMANUAL_F.om_customer_no NOT LIKE 'Z%' OR W_CHP_SALES_NOMANUAL_F.om_customer_no = 'Z5170')"
        ],
        "Join": [
            "W_CHP_SALES_NOMANUAL_F.order_type = W_CHP_ORDER_TYPE_D.order_type",
            "W_CHP_SALES_NOMANUAL_F.line_order_type = W_CHP_ORDER_TYPE_D.order_line_type",
            "W_CHP_SALES_NOMANUAL_F.PAPER_STAT_GROUP_SALES = W_CHP_PAPER_SALES_ORNT_R.PAPER_STAT_GROUP_SALES(+)",
            "W_CHP_SALES_NOMANUAL_F.paper_stat_element = W_CHP_PAPER_STAT_GROUP_R.paper_stat_element(+)",
            "W_CHP_SALES_NOMANUAL_F.ORG_CODE = W_YFY_ORG_D.ORG_CODE",
            "W_CHP_SALES_NOMANUAL_F.PAPER_STAT_GROUP_SALES = W_CHP_PAPER_STAT_GROUP_R(cg).PAPER_STAT_GROUP_SALES(+)"
        ],
        "Groupby": {}
    },
    
    "Union2": {
        "Datasource": ["W_CHP_SALES_NOMANUAL_F", "W_CHP_ORDER_TYPE_D", "W_CHP_PAPER_SALES_ORNT_R", "W_CHP_PAPER_STAT_GROUP_R", "W_YFY_ORG_D", "W_CHP_PAPER_STAT_GROUP_R(cg)"],
        "Filter": [
            "W_CHP_ORDER_TYPE_D.USED = '損益'",
            "ORDER_LINE_STATUS = 'AWAITING_SHIPPING'",
            "GREATEST(W_CHP_SALES_NOMANUAL_F.REQUEST_DATE, W_CHP_SALES_NOMANUAL_F.SCHEDULE_SHIP_DATE) >= trunc(ADD_MONTHS(sysdate, -2), 'mm')",
            "W_CHP_SALES_NOMANUAL_F.om_customer_no <> 'S0001'",
            "(W_CHP_SALES_NOMANUAL_F.om_customer_no NOT LIKE 'Z%' OR W_CHP_SALES_NOMANUAL_F.om_customer_no = 'Z5170')"
        ],
        "Join": [
            "W_CHP_SALES_NOMANUAL_F.order_type = W_CHP_ORDER_TYPE_D.order_type",
            "W_CHP_SALES_NOMANUAL_F.line_order_type = W_CHP_ORDER_TYPE_D.order_line_type",
            "W_CHP_SALES_NOMANUAL_F.PAPER_STAT_GROUP_SALES = W_CHP_PAPER_SALES_ORNT_R.PAPER_STAT_GROUP_SALES(+)",
            "W_CHP_SALES_NOMANUAL_F.paper_stat_element = W_CHP_PAPER_STAT_GROUP_R.paper_stat_element(+)",
            "W_CHP_SALES_NOMANUAL_F.ORG_CODE = W_YFY_ORG_D.ORG_CODE",
            "W_CHP_SALES_NOMANUAL_F.PAPER_STAT_GROUP_SALES = W_CHP_PAPER_STAT_GROUP_R(cg).PAPER_STAT_GROUP_SALES(+)"
        ],
        "Groupby": {}
    },
    
    "Union3": {
        "Datasource": ["W_CHP_SALES_NOMANUAL_F", "W_CHP_ORDER_TYPE_D", "W_CHP_PAPER_SALES_ORNT_R", "W_CHP_PAPER_STAT_GROUP_R", "W_YFY_ORG_D", "W_CHP_PAPER_STAT_GROUP_R(cg)"],
        "Filter": [
            "W_CHP_ORDER_TYPE_D.USED = '損益'",
            "TRX_DATE IS NOT NULL",
            "W_CHP_SALES_NOMANUAL_F.ar_customer_no <> 'S0001'",
            "(W_CHP_SALES_NOMANUAL_F.ar_customer_no NOT LIKE 'Z%' OR W_CHP_SALES_NOMANUAL_F.ar_customer_no = 'Z5170')"
        ],
        "Join": [
            "W_CHP_SALES_NOMANUAL_F.order_type = W_CHP_ORDER_TYPE_D.order_type",
            "W_CHP_SALES_NOMANUAL_F.line_order_type = W_CHP_ORDER_TYPE_D.order_line_type",
            "W_CHP_SALES_NOMANUAL_F.PAPER_STAT_GROUP_SALES = W_CHP_PAPER_SALES_ORNT_R.PAPER_STAT_GROUP_SALES(+)",
            "W_CHP_SALES_NOMANUAL_F.paper_stat_element = W_CHP_PAPER_STAT_GROUP_R.paper_stat_element(+)",
            "W_CHP_SALES_NOMANUAL_F.ORG_CODE = W_YFY_ORG_D.ORG_CODE",
            "W_CHP_SALES_NOMANUAL_F.PAPER_STAT_GROUP_SALES = W_CHP_PAPER_STAT_GROUP_R(cg).PAPER_STAT_GROUP_SALES(+)"
        ],
        "Groupby": {}
    },

    "Final": {
        "Datasource": ["(All Union Tables)"],
        "Filter": [],
        "Join": [],
        "Groupby": [
            "TYPE",
            "Achieve_DATE",
            "Achieve_PERIOD_NAME",
            "Achieve_Customer_ID",
            "Achieve_Customer_No",
            "target_area",
            "Achieve_Salesrep_Number",
            "Achieve_Sales_Group",
            "paper_stat_group_sales",
            "NVL(R.PAPER_SALES_ORNT, 'N/A')",
            "(case W_CHP_ORDER_TYPE_D.sales_quantity when 'P' then 1 when 'M' then 1 else 0 end) * W_CHP_SALES_NOMANUAL_F.ordered_qty_mt",
            "(case W_CHP_ORDER_TYPE_D.sales_quantity when 'P' then 1 when 'M' then 1 else 0 end) * W_CHP_SALES_NOMANUAL_F.ordered_qty_lb",
            "(case W_CHP_ORDER_TYPE_D.sales_volume when 'P' then 1 when 'M' then 1 else 0 end) * W_CHP_SALES_NOMANUAL_F.ordered_amount_loc",
            "case when W_CHP_ORDER_TYPE_D.sales_quantity in ('P', 'M') then '計量' else '不計量' end",
            "case when W_CHP_ORDER_TYPE_D.sales_volume in ('P', 'M') then '計值' else '不計值' end",
            "W_CHP_SALES_NOMANUAL_F.org_id",
            "W_CHP_SALES_NOMANUAL_F.organization_id",
            "W_CHP_SALES_NOMANUAL_F.org_code",
            "W_YFY_ORG_D.OU_CODE",
            "W_CHP_SALES_NOMANUAL_F.om_customer_id",
            "W_CHP_SALES_NOMANUAL_F.om_customer_no",
            "W_CHP_SALES_NOMANUAL_F.om_customer_name",
            "W_CHP_SALES_NOMANUAL_F.om_salesrep_id",
            "W_CHP_SALES_NOMANUAL_F.om_salesrep_number",
            "W_CHP_SALES_NOMANUAL_F.om_sales_group",
            "W_CHP_SALES_NOMANUAL_F.order_number",
            "W_CHP_SALES_NOMANUAL_F.cust_po_number",
            "W_CHP_SALES_NOMANUAL_F.om_curr_code",
            "W_CHP_SALES_NOMANUAL_F.curr_code_loc",
            "W_CHP_SALES_NOMANUAL_F.header_id",
            "W_CHP_SALES_NOMANUAL_F.line_id",
            "W_CHP_SALES_NOMANUAL_F.line_no",
            "W_CHP_SALES_NOMANUAL_F.order_header_status",
            "W_CHP_SALES_NOMANUAL_F.order_line_status",
            "W_CHP_SALES_NOMANUAL_F.order_type",
            "W_CHP_SALES_NOMANUAL_F.line_order_type",
            "W_CHP_SALES_NOMANUAL_F.order_type_short",
            "W_CHP_SALES_NOMANUAL_F.line_order_type_short",
            "W_CHP_SALES_NOMANUAL_F.sales_document_name",
            "W_CHP_SALES_NOMANUAL_F.subinventory_code",
            "W_CHP_SALES_NOMANUAL_F.order_kind",
            "W_CHP_SALES_NOMANUAL_F.line_category_code",
            "W_CHP_SALES_NOMANUAL_F.order_category_detail",
            "W_CHP_SALES_NOMANUAL_F.booked_date",
            "W_CHP_SALES_NOMANUAL_F.ordered_date",
            "W_CHP_SALES_NOMANUAL_F.promise_date",
            "W_CHP_SALES_NOMANUAL_F.pricing_date",
            "W_CHP_SALES_NOMANUAL_F.request_date",
            "W_CHP_SALES_NOMANUAL_F.order_uom",
            "W_CHP_SALES_NOMANUAL_F.om_ex_rate_loc",
            "W_CHP_SALES_NOMANUAL_F.tax_code",
            "W_CHP_SALES_NOMANUAL_F.unit_price",
            "W_CHP_SALES_NOMANUAL_F.ordered_qty",
            "W_CHP_SALES_NOMANUAL_F.ordered_qty_mt",
            "W_CHP_SALES_NOMANUAL_F.ordered_qty_lb",
            "W_CHP_SALES_NOMANUAL_F.ordered_qty_re",
            "W_CHP_SALES_NOMANUAL_F.reservation_qty_mt",
            "W_CHP_SALES_NOMANUAL_F.ordered_amount_loc",
            "W_CHP_SALES_NOMANUAL_F.inventory_item_id",
            "W_CHP_SALES_NOMANUAL_F.item_no",
            "W_CHP_SALES_NOMANUAL_F.main_category",
            "W_CHP_SALES_NOMANUAL_F.sub_category",
            "W_CHP_SALES_NOMANUAL_F.return_context",
            "W_CHP_SALES_NOMANUAL_F.payment_term_id",
            "W_CHP_SALES_NOMANUAL_F.payment_term",
            "W_CHP_SALES_NOMANUAL_F.payment_term_desc",
            "W_CHP_SALES_NOMANUAL_F.packing_instructions",
            "W_CHP_SALES_NOMANUAL_F.category_cost",
            "W_CHP_SALES_NOMANUAL_F.to_customer_id",
            "W_CHP_SALES_NOMANUAL_F.to_customer_no",
            "W_CHP_SALES_NOMANUAL_F.to_customer_name",
            "W_CHP_SALES_NOMANUAL_F.country",
            "W_CHP_SALES_NOMANUAL_F.city",
            "W_CHP_SALES_NOMANUAL_F.continent",
            "W_CHP_SALES_NOMANUAL_F.area",
            "W_CHP_SALES_NOMANUAL_F.schedule_ship_date",
            "W_CHP_SALES_NOMANUAL_F.shipment_date",
            "W_CHP_SALES_NOMANUAL_F.shipped_qty",
            "W_CHP_SALES_NOMANUAL_F.shipped_qty_mt",
            "W_CHP_SALES_NOMANUAL_F.shipped_qty_lb",
            "W_CHP_SALES_NOMANUAL_F.shipped_qty_re",
            "W_CHP_SALES_NOMANUAL_F.shipped_amount_loc",
            "W_CHP_SALES_NOMANUAL_F.trx_date",
            "W_CHP_SALES_NOMANUAL_F.trx_period_name",
            "W_CHP_SALES_NOMANUAL_F.trx_type",
            "W_CHP_SALES_NOMANUAL_F.trx_number",
            "W_CHP_SALES_NOMANUAL_F.ar_customer_id",
            "W_CHP_SALES_NOMANUAL_F.ar_customer_no",
            "W_CHP_SALES_NOMANUAL_F.ar_customer_name",
            "W_CHP_SALES_NOMANUAL_F.ar_salesrep_number",
            "W_CHP_SALES_NOMANUAL_F.ar_sales_group",
            "W_CHP_SALES_NOMANUAL_F.ar_ex_rate_loc",
            "W_CHP_SALES_NOMANUAL_F.ar_curr_code",
            "W_CHP_SALES_NOMANUAL_F.ar_qty",
            "W_CHP_SALES_NOMANUAL_F.ar_qty_mt",
            "W_CHP_SALES_NOMANUAL_F.ar_qty_lb",
            "W_CHP_SALES_NOMANUAL_F.ar_qty_re",
            "W_CHP_SALES_NOMANUAL_F.ar_amount",
            "W_CHP_SALES_NOMANUAL_F.ar_amount_loc",
            "W_CHP_SALES_NOMANUAL_F.split_from_line_id",
            "W_CHP_SALES_NOMANUAL_F.ref_line_id",
            "W_CHP_SALES_NOMANUAL_F.paper_type_cat",
            "W_CHP_SALES_NOMANUAL_F.paper_type",
            "W_CHP_SALES_NOMANUAL_F.paper_stat_element",
            "case W_CHP_PAPER_STAT_GROUP_R(cg).paper_stat_category_sales when 'Specialty' then '特紙' when 'Commodity' then '大紙' else '空白' end",
            "W_CHP_SALES_NOMANUAL_F.base_wt",
            "W_CHP_SALES_NOMANUAL_F.paper_class",
            "W_CHP_SALES_NOMANUAL_F.spec",
            "W_CHP_SALES_NOMANUAL_F.ream_wt",
            "W_CHP_SALES_NOMANUAL_F.spec_class",
            "W_CHP_SALES_NOMANUAL_F.market_rules",
            "W_CHP_SALES_NOMANUAL_F.item_l_norm",
            "W_CHP_SALES_NOMANUAL_F.item_r_norm",
            "W_CHP_SALES_NOMANUAL_F.demand_class_code",
            "W_CHP_SALES_NOMANUAL_F.demand_class_name",
            "W_CHP_SALES_NOMANUAL_F.price_list",
            "W_CHP_SALES_NOMANUAL_F.sales_agreement",
            "W_CHP_SALES_NOMANUAL_F.fob",
            "W_CHP_SALES_NOMANUAL_F.terms",
            "W_CHP_SALES_NOMANUAL_F.delivery_name",
            "W_CHP_SALES_NOMANUAL_F.waybill",
            "W_CHP_SALES_NOMANUAL_F.trip_care",
            "W_CHP_SALES_NOMANUAL_F.trip_name",
            "W_CHP_SALES_NOMANUAL_F.qt_no",
            "W_CHP_SALES_NOMANUAL_F.segment1",
            "W_CHP_SALES_NOMANUAL_F.segment2",
            "W_CHP_SALES_NOMANUAL_F.segment3",
            "W_CHP_SALES_NOMANUAL_F.segment4",
            "W_CHP_SALES_NOMANUAL_F.counting_by"
        ]
    }
}
"""