In [None]:
# Define the custom dataset class
class ExplanationDataset(Dataset):
    def __init__(self, dataframe):
        self.labels = dataframe['confidence_score'].values
        self.texts = dataframe['explanation'].values
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx], truncation=True, padding='max_length', max_length=512)
        item = {key: torch.tensor(val) for key, val in encoding.items()}
        # Regression requires float labels
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

# Function to train the model on the entire dataset
def train_model(df, model_name):
    # Use the entire dataset for training
    train_dataset = ExplanationDataset(df)

    # Load the custom BERT model for regression
    model = BertForRegression()

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results_{model_name}',
        num_train_epochs=10,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir=f'./logs_{model_name}',
        logging_steps=10,
        evaluation_strategy="no",  # No evaluation steps since we're training on the full dataset
        save_steps=100,
        save_total_limit=2,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
    )

    # Train the model
    trainer.train()

    # Save the model and tokenizer
    torch.save(model.state_dict(), f'bert-finetuned-{model_name}.pt')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    tokenizer.save_pretrained(f'bert-finetuned-{model_name}')

# Load the datasets from the provided CSV files
df_layperson = pd.read_csv('layperson_explanation_dataset.csv')
df_expert = pd.read_csv('expert_explanation_dataset.csv')

# Train the model for layperson explanations
train_model(df_layperson, 'layperson')

# Train the model for expert explanations
train_model(df_expert, 'expert')


In [None]:
import openai
import torch
from transformers import BertTokenizer, BertForSequenceClassification, TextClassificationPipeline

openai.api_key = 'sk-proj-eCKWxtFXUDb0yThKFtx1T3BlbkFJXvgHaR4d455KUPYGVi4L'

# Generating enriched explanation from mathematical sentence
domain = (
    "two agents representing two people living together while organizing a party negotiate over 6 issues: "
    "the food type, drinks type, location, type of invitations, music, and the clean-up service. Each issue "
    "further consists of 3 to 5 values, resulting in a domain with 3072 total possible outcomes."
)

def enrich_explanation(sentence):
    
    prompt = (
        f"Provide a clear and concise explanation of the following statement in just 1 or 2 lines. Consider the domain context:\n\n"
        f"{domain}\n\n"
        f"Statement: {sentence}\n\nExplanation:"
    )

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You're an expert assistant who provides clear and concise explanation"},
            {"role": "user", "content": prompt}
        ],
        max_tokens=400,
        temperature=0.7,
        top_p=0.9,
        frequency_penalty=0.0,
        presence_penalty=0.0,
    )

    enriched_explanation = response['choices'][0]['message']['content'].strip()
    return enriched_explanation


sentence = r"Ensures \(U_u(\omega_t^o) \) meets either a calculated statistical value or a specified minimum utility requirement in the initial interval \( [0.000, 0.0361) \)"
enriched_sentence = enrich_explanation(sentence)
print(f"Enriched Explanation:\n{enriched_sentence}\n")

In [None]:
def prompt_layperson(enriched_sentence):
    return (
        "Your task is to explain the following mathematical statement in very simple terms, suitable for someone without any technical background. The explanation should be clear, concise, and within 30 words. Avoid using any jargon or complex terms. Refer to the examples below for the style of explanation:\n\n"
        f"**Mathematical Statement:**\n{enriched_sentence}\n\n"
        "**Examples of Clear Explanations for a Layperson:**\n"
        "1. The final price should match the average market price or include a discount, ensuring it is fair and competitive.\n"
        "2. In the first phase, the plan should improve basic features to be at least as good as a standard option.\n"
        "3. The service package should meet a basic quality level or reach a specific customer satisfaction score to ensure a good experience.\n"
        "4. The initial budget must be large enough to cover all estimated costs and any additional expenses.\n\n"
        "**Your Task:**\n"
        "Based on the mathematical statement provided, generate a clear and simple explanation suitable for a layperson, within 50 words."
    )


# Prompt for expert explanation


def prompt_expert(enriched_sentence):
    return (
        "Provide a detailed and technical explanation of the following mathematical statement for a domain expert. The explanation should be within 50 words. Refer to the examples below for the style of explanation:\n\n"
        f"**Mathematical Statement:**\n{enriched_sentence}\n\n"
        "**Explanation for Domain Expert:**\n"
        "1. During the second interval [0.0361, 1.000], the utility of the opponent's offer \( U_u(\omega_t^o) \) must exceed the higher of a predefined threshold \( u \) or the quantile function \( U_{\Omega^o_t} \) at a specific time-dependent point.\n"
        "2. The initial evaluation phase requires the service package value \( V_s \) to surpass the minimum quality benchmark or meet a defined satisfaction threshold to ensure compliance with service standards.\n"
        "3. The order quantity \( Q_s \) must align with the highest value between the minimum stock level and a demand forecast quantile to optimize inventory management during the initial stocking phase.\n\n"
        "**Your Task:**\n"
        "Provide a similar style explanation suitable for an expert, within 50 words."
    )

In [None]:
def custom_explanation(sentence, target_audience, prompt_func, confidence_score=None, max_tokens=400, temperature=0.6, top_p=0.7, frequency_penalty=0.0, presence_penalty=0.0):
    # Generate the initial prompt based on the target audience
    prompt = prompt_func(enriched_sentence)

    # If confidence_score is provided, generate feedback
    if confidence_score is not None:
        feedback = generate_feedback(
            enriched_sentence, confidence_score, target_audience)
        prompt += f"\n\nFeedback for Improvement:\n{feedback}\n\nRefine the explanation based on the feedback."
    else:
        feedback = None

    # Use OpenAI's API to get the explanation
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an expert assistant. Your task is to provide clear and concise explanations for the specified audience."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        presence_penalty=presence_penalty,
    )

    # Extract the custom explanation from the response
    custom_explanation = response['choices'][0]['message']['content'].strip()

    # Check if the response is close to the token limit and add a note if it is
    if len(custom_explanation) >= max_tokens - 20:
        custom_explanation += " (response cut off, please refine or increase token limit)"

    return custom_explanation



In [None]:
# Load the fine-tuned models after additional training
layperson_model = BertForRegression()
layperson_model.load_state_dict(torch.load('bert-finetuned-layperson.pt'))
layperson_tokenizer = BertTokenizer.from_pretrained('bert-finetuned-layperson')

expert_model = BertForRegression()
expert_model.load_state_dict(torch.load('bert-finetuned-expert.pt'))
expert_tokenizer = BertTokenizer.from_pretrained('bert-finetuned-expert')

# Function to validate an explanation using the appropriate model and tokenizer
def validate_explanation(explanation, target_audience, max_length=512):
    if target_audience == 'layperson':
        model = layperson_model
        tokenizer = layperson_tokenizer
    elif target_audience == 'expert':
        model = expert_model
        tokenizer = expert_tokenizer
    else:
        raise ValueError("Invalid target audience. Choose either 'layperson' or 'expert'.")

    model.eval()
    inputs = tokenizer(explanation, return_tensors="pt",
                       truncation=True, padding='max_length', max_length=max_length)
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Directly use the regression output (already in [0, 1] range due to sigmoid)
    confidence_score = outputs.item()
    
    return confidence_score

# Function to generate feedback based on the confidence score and target audience
def generate_feedback(explanation, confidence_score, target_audience):
    feedback = ""

    if target_audience == 'layperson':
        if confidence_score < 0.4:
            feedback = "The explanation is too complex and difficult for a layperson to understand. Simplify the language, remove technical terms, and use more relatable examples."
        elif 0.4 <= confidence_score < 0.7:
            feedback = "The explanation is somewhat clear but could be improved. Consider simplifying the language further and ensuring it is more engaging for a layperson."
        elif confidence_score >= 0.7:
            feedback = "The explanation is clear and easy to understand. It's well-suited for a layperson, but consider making it even more engaging or concise."
    elif target_audience == 'expert':
        if confidence_score < 0.4:
            feedback = "The explanation lacks the necessary technical depth and detail for an expert. Include more precise terms, context, and relevant details to improve it."
        elif 0.4 <= confidence_score < 0.7:
            feedback = "The explanation is somewhat detailed but could benefit from additional technical depth. Ensure all relevant information is included and accurately presented."
        elif confidence_score >= 0.7:
            feedback = "The explanation is detailed and technically sound, making it well-suited for an expert audience. You might consider adding even more technical depth if appropriate."

    return feedback



In [None]:
# Function to get user choice
def get_user_choice():
    while True:
        choice = input(
            "Choose the target audience (layperson/expert): ").strip().lower()
        if choice in ['layperson', 'expert']:
            return choice
        else:
            print("Invalid choice. Please enter 'layperson' or 'expert'.")


# Layperson threshold (you might want to adjust this if you're doing expert explanations)
threshold = 0.7

# Get the user's choice for target audience
target_audience = get_user_choice()

# Loop until explanation meets the threshold for the chosen audience
explanation = ""
feedback = ""
while True:
    explanation = custom_explanation(
        enriched_sentence, target_audience, prompt_layperson if target_audience == 'layperson' else prompt_expert, max_tokens=400, temperature=0.7, top_p=1.0,)

    # Output the generated explanation
    print(f"Generated Explanation for {target_audience}:\n{explanation}\n")

    score = validate_explanation(explanation, target_audience)

    # Output the confidence score
    print(f"Confidence Score: {score}\n")

    feedback = generate_feedback(explanation, score, target_audience)

    if score >= threshold:
        print(
            f"Final Explanation for {target_audience} based on enriched sentence:\n\n{explanation}\n")
        break
    else:
        print(f"{target_audience.capitalize()} explanation below threshold with score {score}, refining... Feedback: {feedback}")