In [1]:
!pip install pdfplumber
!pip install transformers
!pip install huggingface_hub



In [None]:
import pdfplumber
from transformers import pipeline
from huggingface_hub import hf_hub_download
import json
import pandas as pd

# Step 1: Extract Text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text

# Step 2: Load Llama/Grok Model for Text Processing with Error Handling
#def load_llm_model():
#    try:
#        return pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1", token="")
#    except Exception as e:
#        print(f"Error loading LLM model: {e}")
#        return None

def load_llm_model():
    try:
        return pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf", token="", device="cpu")
        # Alternative: Use "EleutherAI/gpt-neo-2.7B" if you want a slightly larger model.
    except Exception as e:
        print(f"Error loading LLM model: {e}")
        return None

llm = load_llm_model()

def generate_validation_functions(rules_text):
    if llm is None:
        print("LLM model could not be loaded. Exiting.")
        return ""

    prompt = f"""
    Based on the following regulatory risk rules, generate Python validation functions:
    {rules_text}
    Each function should check a condition and return True/False with an appropriate message.
    """

    try:
        response = llm(prompt, max_length=2000)[0]['generated_text']
        return response
    except Exception as e:
        print(f"Error generating validation functions: {e}")
        return ""

# Step 3: Process the PDF and Generate Code
pdf_path = "Regulatory_Repoting_Instructions.pdf"
rules_text = extract_text_from_pdf(pdf_path)
generated_code = generate_validation_functions(rules_text)

# Step 4: Save and Execute the Generated Code
if generated_code:
    generated_script_path = "validation_rules.py"
    with open(generated_script_path, "w") as file:
        file.write(generated_code)
    try:
        exec(generated_code)  # Dynamically define validation functions
    except Exception as e:
        print(f"Error executing generated code: {e}")

# Step 5: Example Transaction Validation (Assuming LLM generates functions like validate_transaction_amount)
def validate_transaction(transaction):
    results = {}

    if 'validate_transaction_amount' in globals():
        results['Transaction Amount'] = validate_transaction_amount(transaction['Transaction_Amount'], transaction['Reported_Amount'], transaction.get('Is_Cross_Currency', False))

    if 'validate_account_balance' in globals():
        results['Account Balance'] = validate_account_balance(transaction['Account_Balance'], transaction.get('OD_Flag', False))

    return results

# Step 6: Read Transactions from CSV and Validate
csv_path = "customer_transactions.csv"
try:
    df = pd.read_csv(csv_path)
    validation_results = []
    for _, row in df.iterrows():
        transaction = row.to_dict()
        validation_results.append(validate_transaction(transaction))

    # Print results
    print(json.dumps(validation_results, indent=4))
except Exception as e:
    print(f"Error reading CSV file: {e}")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]