In [1]:
import os
import json
import pandas as pd
import xmltodict

In [2]:
def xml_to_df(xml_file_name):
    with open(xml_file_name) as xml_file:
        data_dict = xmltodict.parse(xml_file.read())
    rule_name = []
    rule_type = []
    output_reference = []
    for i in range(0, len(data_dict['DATA_IMPORT']['RULE_SET'])):
        try:
            a = data_dict['DATA_IMPORT']['RULE_SET'][i]['RULE']['@NAME']
            b = data_dict['DATA_IMPORT']['RULE_SET'][i]['RULE']['@TYPE']
            c = data_dict['DATA_IMPORT']['RULE_SET'][i]['RULE']['ACTION_EXPRESSION_SET']['ACTION_EXPRESSION']['FUNCTION']['OUTPUT_REFERENCE']['@NAME'] 
            rule_name.append(a)
            rule_type.append(b)
            output_reference.append(c)
        except:
            pass

    df = pd.DataFrame({'RULE_TYPE':rule_type,'RULE_NAME':rule_name, 'OUTPUT_REFERENCE_NAME':output_reference})
    return df

df = xml_to_df("Rule_Name_Correct_Incorrect.xml")
df

Unnamed: 0,RULE_TYPE,RULE_NAME,OUTPUT_REFERENCE_NAME
0,DIRECT_TRANSACTION_CREDIT,CR_VAR_COMP_EARNED_PREMIUM,C_VAR_COMP_EARNED_PREMIUM
1,ROLLUP_TRANSACTION_CREDIT,CR_VAR_COMP_ROLLED_EARNED_PREMIUM,C_VAR_COMP_ROLLED_EARNED_PREMIUM
2,BULK_COMMISSION,IR_CL_VAR_COMP_COMMISSION,I_CL_VAR_COMP_COMMISSION
3,DEPOSIT,DR_VAR_COMP_COMMISSION,D_VAR_COMP_COMMISSION
4,SECONDARY_MEASUREMENT,SMR_PL_VAR_COMP_COMMISSION,SM_PL_VAR_COMP_COMMISSION
5,PRIMARY_MEASUREMENT,PMR_VAR_COMP_PL_NEW_EP,PM_VAR_COMP_PL_NEW_EP
6,BULK_COMMISSION,IR_CL_VAR_COMP_COMMISSION,IR_CL_VAR_COMP_COMMISSION
7,DEPOSIT,DRR_COMP_COMMISSION,DR_COMP_COMMISSION
8,PRIMARY_MEASUREMENT,PMR_VAR_COMP_PL_RENEW_PREMIUM,PMR_VAR_COMP_PL_RENEW_PREMIUM
9,SECONDARY_MEASUREMENT,SMR_VAR_COMP_PL_Total_NEW_Premium,SMR_VAR_COMP_PL_Total_NEW_Premium


In [3]:
import pandas as pd
import ollama
from langchain_ollama import ChatOllama
# Step 1: Sample DataFrame
# data = {
#     "RULE_TYPE": ["SECONDARY_MEASUREMENT", "PRIMARY_MEASUREMENT"],
#     "RULE_NAME": ["SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR", "WRONG_NAME"],
#     "OUTPUT_REFERENCE_NAME": ["SM_PL_VAR_COMP_LOSS_RATIO_FACTOR", "XYZ_OUTPUT"]
# }
# df = pd.DataFrame(data)

# Step 2: Few-shot examples
few_shots = [
    {
        "RULE_TYPE": "SECONDARY_MEASUREMENT",
        "RULE_NAME": "SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "OUTPUT_REFERENCE_NAME": "SM_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "SECONDARY_MEASUREMENT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "SM_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "SECONDARY_MEASUREMENT",
        "RULE_NAME": "SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "OUTPUT_REFERENCE_NAME": "WRONG_OUTPUT",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "DIRECT_TRANSACTION_CREDIT",
        "RULE_NAME": "CR_VAR_COMP_PREMIUM",
        "OUTPUT_REFERENCE_NAME": "C_VAR_COMP_PREMIUM",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "DIRECT_TRANSACTION_CREDIT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "ROLLUP_TRANSACTION_CREDIT",
        "RULE_NAME": "CR_VAR_COMP_ROLLED_EARNED_PREMIUM",
        "OUTPUT_REFERENCE_NAME": "C_VAR_COMP_ROLLED_EARNED_PREMIUM",
        "result": "VALID"        
    },
    {
        "RULE_TYPE": "ROLLUP_TRANSACTION_CREDIT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "Not Matching"        
    },
    {
        "RULE_TYPE": "BULK_COMMISSION",
        "RULE_NAME": "IR_CL_VAR_COMP_COMMISSION",
        "OUTPUT_REFERENCE_NAME": "I_CL_VAR_COMP_COMMISSION",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "BULK_COMMISSION",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "DEPOSIT",
        "RULE_NAME": "DR_VAR_COMP_COMMISSION",
        "OUTPUT_REFERENCE_NAME": "D_VAR_COMP_COMMISSION",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "DEPOSIT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "VALID"
    }
]

# Step 3: Build prompt
def build_prompt(row, few_shots):
    prompt = "You are a smart data validator.\n"
    prompt += "Rule: If RULE_TYPE is 'SECONDARY_MEASUREMENT', then RULE_NAME must be 'SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR' and OUTPUT_REFERENCE_NAME must be 'SM_PL_VAR_COMP_LOSS_RATIO_FACTOR' on the other hand  RULE_TYPE is 'DIRECT_TRANSACTION_CREDIT', then RULE_NAME must be 'CR_VAR_COMP_PREMIUM' and OUTPUT_REFERENCE_NAME must be 'C_VAR_COMP_PREMIUM'. Again if RULE_TYPE is 'ROLLUP_TRANSACTION_CREDIT', then RULE_NAME must be 'CR_VAR_COMP_ROLLED_EARNED_PREMIUM' and OUTPUT_REFERENCE_NAME must be 'C_VAR_COMP_ROLLED_EARNED_PREMIUM'.\n"
    prompt += "Again if the RULE_TYPE is 'BULK_COMMISSION', then RULE_NAME must be 'IR_CL_VAR_COMP_COMMISSION' and OUTPUT_REFERENCE_NAME must be 'I_CL_VAR_COMP_COMMISSION'.\n"
    prompt += "If the RULE_TYPE is 'DEPOSIT', then 'RULE_NAME' must be 'DR_VAR_COMP_COMMISSION' and the OUTPUT_REFERENCE_NAME must be 'D_VAR_COMP_COMMISSION'.\n\n"
    prompt += "Here are some examples:\n"
    for shot in few_shots:
        example = f"RULE_TYPE: {shot['RULE_TYPE']}, RULE_NAME: {shot['RULE_NAME']}, OUTPUT_REFERENCE_NAME: {shot['OUTPUT_REFERENCE_NAME']} -> {shot['result']}\n"
        prompt += example
    prompt += "\nNow evaluate the following only answer whether its is VALID or Not Matching:\n"
    prompt += f"RULE_TYPE: {row['RULE_TYPE']}, RULE_NAME: {row['RULE_NAME']}, OUTPUT_REFERENCE_NAME: {row['OUTPUT_REFERENCE_NAME']}\n"
    prompt += "Result:"
    return prompt

# Step 4: Call local LLaMA model using Ollama
def validate_row_with_llama(row):
    prompt = build_prompt(row, few_shots)
    response = ollama.chat(
        model="llama3",
        messages=[
            {"role": "system", "content": "You are a smart data validator."},
            {"role": "user", "content": prompt}
        ],
        options={
            'temperature': 0
            }
        # temperature=0  # Ensure deterministic validation
    )
    return response['message']['content'].strip()

# Step 5: Apply to DataFrame
df["LLM_VALIDATION"] = df.apply(validate_row_with_llama, axis=1)

# Print the results
print(df)

                    RULE_TYPE                          RULE_NAME  \
0   DIRECT_TRANSACTION_CREDIT         CR_VAR_COMP_EARNED_PREMIUM   
1   ROLLUP_TRANSACTION_CREDIT  CR_VAR_COMP_ROLLED_EARNED_PREMIUM   
2             BULK_COMMISSION          IR_CL_VAR_COMP_COMMISSION   
3                     DEPOSIT             DR_VAR_COMP_COMMISSION   
4       SECONDARY_MEASUREMENT         SMR_PL_VAR_COMP_COMMISSION   
5         PRIMARY_MEASUREMENT             PMR_VAR_COMP_PL_NEW_EP   
6             BULK_COMMISSION          IR_CL_VAR_COMP_COMMISSION   
7                     DEPOSIT                DRR_COMP_COMMISSION   
8         PRIMARY_MEASUREMENT      PMR_VAR_COMP_PL_RENEW_PREMIUM   
9       SECONDARY_MEASUREMENT  SMR_VAR_COMP_PL_Total_NEW_Premium   
10  DIRECT_TRANSACTION_CREDIT                 C_VAR_COMP_PREMIUM   
11            BULK_COMMISSION          IR_CL_VAR_COMP_COMMISSION   
12                    DEPOSIT                DRR_COMP_COMMISSION   
13        PRIMARY_MEASUREMENT      PMR_VAR_COMP_

In [4]:
df

Unnamed: 0,RULE_TYPE,RULE_NAME,OUTPUT_REFERENCE_NAME,LLM_VALIDATION
0,DIRECT_TRANSACTION_CREDIT,CR_VAR_COMP_EARNED_PREMIUM,C_VAR_COMP_EARNED_PREMIUM,Not Matching
1,ROLLUP_TRANSACTION_CREDIT,CR_VAR_COMP_ROLLED_EARNED_PREMIUM,C_VAR_COMP_ROLLED_EARNED_PREMIUM,VALID
2,BULK_COMMISSION,IR_CL_VAR_COMP_COMMISSION,I_CL_VAR_COMP_COMMISSION,VALID
3,DEPOSIT,DR_VAR_COMP_COMMISSION,D_VAR_COMP_COMMISSION,VALID
4,SECONDARY_MEASUREMENT,SMR_PL_VAR_COMP_COMMISSION,SM_PL_VAR_COMP_COMMISSION,Not Matching
5,PRIMARY_MEASUREMENT,PMR_VAR_COMP_PL_NEW_EP,PM_VAR_COMP_PL_NEW_EP,Not Matching
6,BULK_COMMISSION,IR_CL_VAR_COMP_COMMISSION,IR_CL_VAR_COMP_COMMISSION,VALID
7,DEPOSIT,DRR_COMP_COMMISSION,DR_COMP_COMMISSION,Not Matching
8,PRIMARY_MEASUREMENT,PMR_VAR_COMP_PL_RENEW_PREMIUM,PMR_VAR_COMP_PL_RENEW_PREMIUM,Not Matching
9,SECONDARY_MEASUREMENT,SMR_VAR_COMP_PL_Total_NEW_Premium,SMR_VAR_COMP_PL_Total_NEW_Premium,Not Matching


In [5]:
df.iloc[0,0]

'DIRECT_TRANSACTION_CREDIT'

# Using SLM

In [7]:
import pandas as pd
import ollama
from langchain_ollama import ChatOllama
# Step 1: Sample DataFrame
# data = {
#     "RULE_TYPE": ["SECONDARY_MEASUREMENT", "PRIMARY_MEASUREMENT"],
#     "RULE_NAME": ["SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR", "WRONG_NAME"],
#     "OUTPUT_REFERENCE_NAME": ["SM_PL_VAR_COMP_LOSS_RATIO_FACTOR", "XYZ_OUTPUT"]
# }
# df = pd.DataFrame(data)

# Step 2: Few-shot examples
few_shots = [
    {
        "RULE_TYPE": "SECONDARY_MEASUREMENT",
        "RULE_NAME": "SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "OUTPUT_REFERENCE_NAME": "SM_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "SECONDARY_MEASUREMENT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "SM_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "SECONDARY_MEASUREMENT",
        "RULE_NAME": "SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR",
        "OUTPUT_REFERENCE_NAME": "WRONG_OUTPUT",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "DIRECT_TRANSACTION_CREDIT",
        "RULE_NAME": "CR_VAR_COMP_PREMIUM",
        "OUTPUT_REFERENCE_NAME": "C_VAR_COMP_PREMIUM",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "DIRECT_TRANSACTION_CREDIT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "ROLLUP_TRANSACTION_CREDIT",
        "RULE_NAME": "CR_VAR_COMP_ROLLED_EARNED_PREMIUM",
        "OUTPUT_REFERENCE_NAME": "C_VAR_COMP_ROLLED_EARNED_PREMIUM",
        "result": "VALID"        
    },
    {
        "RULE_TYPE": "ROLLUP_TRANSACTION_CREDIT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "Not Matching"        
    },
    {
        "RULE_TYPE": "BULK_COMMISSION",
        "RULE_NAME": "IR_CL_VAR_COMP_COMMISSION",
        "OUTPUT_REFERENCE_NAME": "I_CL_VAR_COMP_COMMISSION",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "BULK_COMMISSION",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "Not Matching"
    },
    {
        "RULE_TYPE": "DEPOSIT",
        "RULE_NAME": "DR_VAR_COMP_COMMISSION",
        "OUTPUT_REFERENCE_NAME": "D_VAR_COMP_COMMISSION",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "DEPOSIT",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "WRONG_NAME",
        "result": "VALID"
    },
    {
        "RULE_TYPE": "BULK_COMMISSION",
        "RULE_NAME": "WRONG_NAME",
        "OUTPUT_REFERENCE_NAME": "IR_CL_VAR_COMP_COMMISSION",
        "result": "Not Matching"
    }
]

# Step 3: Build prompt
def build_prompt(row, few_shots):
    prompt = "You are a smart data validator.\n"
    prompt += "Rule: If RULE_TYPE is 'SECONDARY_MEASUREMENT', then RULE_NAME must be 'SMR_PL_VAR_COMP_LOSS_RATIO_FACTOR' and OUTPUT_REFERENCE_NAME must be 'SM_PL_VAR_COMP_LOSS_RATIO_FACTOR' on the other hand  RULE_TYPE is 'DIRECT_TRANSACTION_CREDIT', then RULE_NAME must be 'CR_VAR_COMP_PREMIUM' and OUTPUT_REFERENCE_NAME must be 'C_VAR_COMP_PREMIUM'. Again if RULE_TYPE is 'ROLLUP_TRANSACTION_CREDIT', then RULE_NAME must be 'CR_VAR_COMP_ROLLED_EARNED_PREMIUM' and OUTPUT_REFERENCE_NAME must be 'C_VAR_COMP_ROLLED_EARNED_PREMIUM'.\n"
    prompt += "Again if the RULE_TYPE is 'BULK_COMMISSION', then RULE_NAME must be 'IR_CL_VAR_COMP_COMMISSION' and OUTPUT_REFERENCE_NAME must be 'I_CL_VAR_COMP_COMMISSION'.\n"

    prompt += "If the RULE_TYPE is 'DEPOSIT', then 'RULE_NAME' must be 'DR_VAR_COMP_COMMISSION' and the OUTPUT_REFERENCE_NAME must be 'D_VAR_COMP_COMMISSION'.\n\n"
    prompt += "Always keep in mind that the RULE_NAME and OUTPUT_REFERENCE_NAME cannot have the same value.\n\n"
    prompt += "Here are some examples:\n"
    for shot in few_shots:
        example = f"RULE_TYPE: {shot['RULE_TYPE']}, RULE_NAME: {shot['RULE_NAME']}, OUTPUT_REFERENCE_NAME: {shot['OUTPUT_REFERENCE_NAME']} -> {shot['result']}\n"
        prompt += example
    prompt += "\nNow evaluate the following only answer whether its is VALID or Not Matching:\n"
    prompt += f"RULE_TYPE: {row['RULE_TYPE']}, RULE_NAME: {row['RULE_NAME']}, OUTPUT_REFERENCE_NAME: {row['OUTPUT_REFERENCE_NAME']}\n"
    prompt += "Result:"
    return prompt

# Step 4: Call local LLaMA model using Ollama
def validate_row_with_llama(row):
    prompt = build_prompt(row, few_shots)
    response = ollama.chat(
        model="llama3",
        messages=[
            {"role": "system", "content": "You are a smart data validator."},
            {"role": "user", "content": prompt}
        ],
        options={
            'temperature': 0
            }
        # temperature=0  # Ensure deterministic validation
    )
    return response['message']['content'].strip()

# Step 5: Apply to DataFrame
df["LLM_VALIDATION"] = df.apply(validate_row_with_llama, axis=1)

# Print the results
print(df)

                    RULE_TYPE                          RULE_NAME  \
0   DIRECT_TRANSACTION_CREDIT         CR_VAR_COMP_EARNED_PREMIUM   
1   ROLLUP_TRANSACTION_CREDIT  CR_VAR_COMP_ROLLED_EARNED_PREMIUM   
2             BULK_COMMISSION          IR_CL_VAR_COMP_COMMISSION   
3                     DEPOSIT             DR_VAR_COMP_COMMISSION   
4       SECONDARY_MEASUREMENT         SMR_PL_VAR_COMP_COMMISSION   
5         PRIMARY_MEASUREMENT             PMR_VAR_COMP_PL_NEW_EP   
6             BULK_COMMISSION          IR_CL_VAR_COMP_COMMISSION   
7                     DEPOSIT                DRR_COMP_COMMISSION   
8         PRIMARY_MEASUREMENT      PMR_VAR_COMP_PL_RENEW_PREMIUM   
9       SECONDARY_MEASUREMENT  SMR_VAR_COMP_PL_Total_NEW_Premium   
10  DIRECT_TRANSACTION_CREDIT                 C_VAR_COMP_PREMIUM   
11            BULK_COMMISSION          IR_CL_VAR_COMP_COMMISSION   
12                    DEPOSIT                DRR_COMP_COMMISSION   
13        PRIMARY_MEASUREMENT      PMR_VAR_COMP_

In [8]:
df

Unnamed: 0,RULE_TYPE,RULE_NAME,OUTPUT_REFERENCE_NAME,LLM_VALIDATION
0,DIRECT_TRANSACTION_CREDIT,CR_VAR_COMP_EARNED_PREMIUM,C_VAR_COMP_EARNED_PREMIUM,VALID
1,ROLLUP_TRANSACTION_CREDIT,CR_VAR_COMP_ROLLED_EARNED_PREMIUM,C_VAR_COMP_ROLLED_EARNED_PREMIUM,VALID
2,BULK_COMMISSION,IR_CL_VAR_COMP_COMMISSION,I_CL_VAR_COMP_COMMISSION,VALID
3,DEPOSIT,DR_VAR_COMP_COMMISSION,D_VAR_COMP_COMMISSION,VALID
4,SECONDARY_MEASUREMENT,SMR_PL_VAR_COMP_COMMISSION,SM_PL_VAR_COMP_COMMISSION,Not Matching\n\nReason: The RULE_NAME 'SMR_PL_...
5,PRIMARY_MEASUREMENT,PMR_VAR_COMP_PL_NEW_EP,PM_VAR_COMP_PL_NEW_EP,Not Matching\n\nThe given RULE_TYPE is 'PRIMAR...
6,BULK_COMMISSION,IR_CL_VAR_COMP_COMMISSION,IR_CL_VAR_COMP_COMMISSION,Not Matching\n\nReason: According to the rules...
7,DEPOSIT,DRR_COMP_COMMISSION,DR_COMP_COMMISSION,"Not Matching.\n\nThe RULE_TYPE is 'DEPOSIT', w..."
8,PRIMARY_MEASUREMENT,PMR_VAR_COMP_PL_RENEW_PREMIUM,PMR_VAR_COMP_PL_RENEW_PREMIUM,Not Matching\n\nThe given RULE_TYPE is 'PRIMAR...
9,SECONDARY_MEASUREMENT,SMR_VAR_COMP_PL_Total_NEW_Premium,SMR_VAR_COMP_PL_Total_NEW_Premium,Not Matching\n\nThe RULE_NAME 'SMR_VAR_COMP_PL...


In [9]:
df.iloc[11,-1]

"Not Matching\n\nReason: According to the rules, if RULE_TYPE is 'BULK_COMMISSION', then RULE_NAME must be 'IR_CL_VAR_COMP_COMMISSION' and OUTPUT_REFERENCE_NAME must be 'I_CL_VAR_COMP_COMMISSION'. Since OUTPUT_REFERENCE_NAME has the same value as RULE_NAME, it does not match the rule."