In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python

In [None]:
import pandas as pd
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

In [None]:
model_name = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
model_file = "Meta-Llama-3-8B-Instruct.Q8_0.gguf"
model_path = hf_hub_download(model_name,
                             filename=model_file,
                             local_dir='/content')
print("My model path: ", model_path)

In [None]:
llm = Llama(model_path='/content/Meta-Llama-3-8B-Instruct.Q8_0.gguf',
            n_gpu_layers=-1,
            n_ctx=4096)

In [None]:
import ast

def format_prompt(context, question, legal_act):
    l = ast.literal_eval(question)
    l = len(l)
    template = f""" Your task is to determine if the signed names meets the condition.
    You'll be given a condition, and a list of signed committee.
    You'll also be given an action that the signing is about.
    ###You can ignore the extra names if the condition is already met
    ###Never assume that there's more committee signed than given.
    Consider action and check if there's any special conditions for that action
    Condition:{context}
    Number of signed signatures: {l}
    List of comittees that signed(signed signatures) {question}
    action: {legal_act}
    #You MUST End the Answer with True if any one of the condition is met, False otherwise"""
    template = template.replace("และประทับตราสำคัญของบริษัท", " ")
    template = template.replace("ประทับตรา", "")
    template = template.replace("และ", " and ")
    template = template.replace("หรือ", " or ")
    template = template.replace ("คนใดคนหนึ่ง", " 1 คน ")
    template = template.replace("สามารถ","####CAN####")
    num_dict = {'หนึ่ง': ' 1 ',
           'สอง': ' 2 ',
           'สาม': ' 3 ',
           'สี่': ' 4 ',
           'ห้า': ' 5 ',
           'เจ็ด': ' 7 ',
           'แปด': ' 8 ',
           'เก้า': ' 9 ',
           'สิบ': ' 10 '}
    for k,v in num_dict.items():
        template = template.replace(k,v)
    template = template.replace("####CAN####","สามารถ",)

    return template

In [None]:
df = pd.read_csv('/content/test_John_1.csv')
df.head()

In [None]:
prompt_list = []
for row in df.iterrows():
    context = row[1]['new_context']
    question = row[1]['new_question']
    legal_act = row[1]['legal_act']
    prompt = format_prompt(context, question, legal_act)
    prompt_list.append(prompt)

In [None]:
df['prompt'] = prompt_list
df['prompt'][10]

In [None]:
num_parts = 8

# Calculate approximate number of rows per part
rows_per_part = len(df) // num_parts

# Generate the indices for splitting
indices = [0] + [rows_per_part * i for i in range(1, num_parts)] + [len(df)]

# Print the indices
print(indices)

In [None]:
from tqdm import tqdm

index1 = 4374
index2 = 5103
ans_list = []

for row in tqdm(df['prompt'][index1:index2]):
  completion = llm.create_chat_completion(
    messages=[
    {"role": "system", "content": "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability."},
    {"role": "user", "content": row}
    ],
    temperature=0.0,
    )
  ans_list.append(completion['choices'][0]['message']['content'])
ans_list

In [None]:
def check_ans(ans):
    message = ans
    message = message.upper()
    t = max(message.rfind('TRUE'), message.rfind('YES'), message.rfind('CONDITION IS MET'))
    f = max(message.rfind('FALSE'), message.rfind('CONDITION IS NOT MET'))
    if(t == -1 and f == -1):
        return None
    if(t >= f):
        return 1
    else:
        return 0

In [None]:
final_ans_list = []
for a in ans_list:
    final_ans_list.append(check_ans(a))
final_ans_list

In [None]:
df_100 = pd.read_csv('/content/test_John_1.csv')
df_100 = df_100[index1:index2]

In [None]:
df_100['model_ans'] = final_ans_list
df_100['model_text'] = ans_list

In [None]:
df_100.to_csv(f'{index1}_{index2}.csv', index=False)