## convert to prompt-based fly ash+slag

In [3]:
import pandas as pd
import json
import os
def format_value(value):
    if pd.isna(value):
        return "unknown"
    elif isinstance(value, (int, float)):
        return f"{value:.1f}"
    else:
        return str(value)

def create_messages(row):
    system_message = (
        "You are an advanced concrete mixture analyst AI assistant, specializing in predicting the compressive strength "
        "of concrete based on detailed mixture composition, curing conditions, and testing parameters. Your goal is to "
        "use the provided parameters to determine or estimate the expected compressive strength of the mixture."
    )
    
    user_message = (
        "I am working with a specific concrete mixture with the following properties. Please analyze the mixture and "
        "provide the estimated compressive strength based on its composition, curing conditions, and testing parameters.\n\n"
        f"Fly Ash (FA) content: {format_value(row['FA - content in mix -  (kg/m3)'])} kg/m3\n"
        f"Ground Granulated Blast Furnace Slag (GGBFS) content: {format_value(row['GGBFS - content in mix -  (kg/m3)'])} kg/m3\n"
        f"Total aggregates content: {format_value(row['Total aggregates - content - (kg/m3)'])} kg/m3\n"
        f"Water content (from all sources): {format_value(row['Water - content from all sources in mix - (kg/m3)'])} kg/m3\n"
        f"Sodium Hydroxide (NaOH) solution: {format_value(row['NaOH solution - content in mix - (kg/m3)'])} kg/m3 at "
        f"{format_value(row['NaOH solution - concentration - molar'])} molarity\n\n"
        f"The mixture underwent an initial curing process for {format_value(row['Initial Curing Process - duration - (day)'])} days at "
        f"{format_value(row['Initial Curing Process - Temperature - (ºC)'])}°C.\n"
        f"Testing was conducted on a {row['Specimen shape']} specimen after {format_value(row['Test age'])} days."
    )

    assistant_message = (
        f"Given the composition and curing conditions you provided, the estimated compressive strength for this concrete mixture "
        f"is {format_value(row['Compressive test'])} MPa."
    )

    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": assistant_message}
    ]


def process_excel_to_gpt4_format(file_path, output_file):
    # Ensure the output directory exists
    output_dir = os.path.dirname(output_file)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    df = pd.read_excel(file_path)
    
    with open(output_file, 'w') as f:
        for _, row in df.iterrows():
            json_object = {
                "messages": create_messages(row)
            }
            f.write(json.dumps(json_object) + '\n')

    print(f"JSONL file '{output_file}' has been created.")


# Usage for "Slag + Fly Ash"
file_path = '../filter_based_on_binder/interpolation_train/fly_ash_and_slag_interpolation_train.xlsx'
process_excel_to_gpt4_format(file_path, 'interpolation_train_prompt/fly_ash_and_slag_interpolation_train.jsonl')


JSONL file 'interpolation_train_prompt/fly_ash_and_slag_interpolation_train.jsonl' has been created.


## Only slag prompt conversion

In [6]:
import pandas as pd
import json
import os

def format_value(value):
    if pd.isna(value):
        return "unknown"
    elif isinstance(value, (int, float)):
        return f"{value:.1f}"
    else:
        return str(value)

def create_messages(row):
    system_message = (
        "You are an advanced concrete mixture analyst AI assistant, specializing in predicting the compressive strength "
        "of concrete based on detailed mixture composition, curing conditions, and testing parameters. Your goal is to "
        "use the provided parameters to determine or estimate the expected compressive strength of the mixture."
    )
    
    user_message = (
        "I am working with a specific concrete mixture containing only Ground Granulated Blast Furnace Slag (GGBFS) as the binder. "
        "Please analyze the mixture composition, curing conditions, and testing parameters, and provide an estimated compressive strength.\n\n"
        f"GGBFS content: {format_value(row['GGBFS - content in mix -  (kg/m3)'])} kg/m3\n"
        f"Total aggregates content: {format_value(row['Total aggregates - content - (kg/m3)'])} kg/m3\n"
        f"Water content (from all sources): {format_value(row['Water - content from all sources in mix - (kg/m3)'])} kg/m3\n"
        f"Sodium Hydroxide (NaOH) solution: {format_value(row['NaOH solution - content in mix - (kg/m3)'])} kg/m3 at "
        f"{format_value(row['NaOH solution - concentration - molar'])} molarity\n\n"
        f"The mixture was initially cured for {format_value(row['Initial Curing Process - duration - (day)'])} days at "
        f"{format_value(row['Initial Curing Process - Temperature - (ºC)'])}°C.\n"
        f"Testing was conducted on a {row['Specimen shape']} specimen after {format_value(row['Test age'])} days."
    )

    assistant_message = (
        f"Based on the composition and curing conditions provided, the estimated compressive strength for this concrete mixture "
        f"is {format_value(row['Compressive test'])} MPa."
    )

    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": assistant_message}
    ]


def process_excel_to_gpt4_format(file_path, output_file):
    # Ensure the output directory exists
    output_dir = os.path.dirname(output_file)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    df = pd.read_excel(file_path)
    
    with open(output_file, 'w') as f:
        for _, row in df.iterrows():
            json_object = {
                "messages": create_messages(row)
            }
            f.write(json.dumps(json_object) + '\n')

    print(f"JSONL file '{output_file}' has been created.")

# Usage for "Only Slag"
file_path = '../filter_based_on_binder/extrapolation_test/only_slag_extrapolation_test.xlsx'
process_excel_to_gpt4_format(file_path, 'extrapolation_test_prompt/only_slag_extrapolation_test.jsonl')


JSONL file 'extrapolation_test_prompt/only_slag_extrapolation_test.jsonl' has been created.


## Only Fly ash prompt generation

In [9]:
import pandas as pd
import json

def format_value(value):
    if pd.isna(value):
        return "unknown"
    elif isinstance(value, (int, float)):
        return f"{value:.1f}"
    else:
        return str(value)

def create_messages(row):
    system_message = (
        "You are an advanced AI assistant specialized in analyzing concrete mixtures and predicting their compressive strength. "
        "Your role is to accurately estimate compressive strength based on specific concrete compositions, especially mixtures "
        "containing Fly Ash (FA) as the sole binder, and their curing and testing conditions."
    )
    
    user_message = (
        "I am working with a concrete mixture containing only Fly Ash (FA) as the binder. "
        "Please analyze the composition, curing conditions, and testing parameters, and provide an estimated compressive strength.\n\n"
        f"Fly Ash (FA) content: {format_value(row['FA - content in mix -  (kg/m3)'])} kg/m3\n"
        f"Total aggregates content: {format_value(row['Total aggregates - content - (kg/m3)'])} kg/m3\n"
        f"Water content (from all sources): {format_value(row['Water - content from all sources in mix - (kg/m3)'])} kg/m3\n"
        f"Sodium Hydroxide (NaOH) solution: {format_value(row['NaOH solution - content in mix - (kg/m3)'])} kg/m3 at "
        f"{format_value(row['NaOH solution - concentration - molar'])} molarity\n\n"
        f"The mixture was initially cured for {format_value(row['Initial Curing Process - duration - (day)'])} days at "
        f"{format_value(row['Initial Curing Process - Temperature - (ºC)'])}°C.\n"
        f"Testing was conducted on a {row['Specimen shape']} specimen after {format_value(row['Test age'])} days."
    )

    assistant_message = (
        f"Based on the Fly Ash-based composition and the curing conditions, the estimated compressive strength for this concrete mixture "
        f"is {format_value(row['Compressive test'])} MPa."
    )

    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": assistant_message}
    ]

def process_excel_to_gpt4_format(file_path, output_file):
    df = pd.read_excel(file_path)
    
    # Ensure directory exists
    output_dir = os.path.dirname(output_file)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    with open(output_file, 'w') as f:
        for _, row in df.iterrows():
            json_object = {
                "messages": create_messages(row)
            }
            f.write(json.dumps(json_object) + '\n')

    print(f"JSONL file '{output_file}' has been created.")

# Usage for "Only Fly Ash"

file_path = '../filter_based_on_binder/interpolation_train/only_fly_ash_interpolation_train.xlsx'
process_excel_to_gpt4_format(file_path, 'interpolation_train_prompt/only_fly_ash_interpolation_train.jsonl')


JSONL file 'interpolation_train_prompt/only_fly_ash_interpolation_train.jsonl' has been created.
