### This notebook is designed to grade data descriptions based on a set of provided guidelines. It has been refined a number of times and the iterative process of writing new guidelines can be seen with the multiple versions of few-shot prompting and the guidelines

In [None]:
#Importing necessary libraries
import os
import openai
import json
import credentials

# Provide your SambaNova credentials
SAMBASTUDIO_API_KEY = credentials.CHATBOT_API_KEY
SAMBASTUDIO_BASE_URL = credentials.CHATBOT_URL
SAMBASTUDIO_MODEL = credentials.CHATBOT_MODEL

client = openai.OpenAI(
    api_key=SAMBASTUDIO_API_KEY,
    base_url=SAMBASTUDIO_BASE_URL,
)

# Define the temperature and top_p parameters if you wish. Otherwise .1 for each is recommended
TEMP = .1
TOP_P = .1

# Test to see if your client is working

# response = client.chat.completions.create(
#     model='Llama-4-Maverick-17B-128E-Instruct',
#     messages=[{"role":"system","content":"You are a helpful assistant"},{"role":"user","content":"Hello"}],
#     temperature =  TEMP,
#     top_p = TOP_P
# )

# print(response.choices[0].message.content)
      

In [None]:
# Open your file containing all of the descriptions that was created by get_descriptions.py

filepath = 'put the file path here'
with open(filepath,'r') as file:
    descriptions = json.load(file)

In [None]:
# These guidelines have been developed to guide the data description for the Digital Porous Media Portal. They have been developed by Maria Esteva and Masa Prodanovic, and refined and condensed by Zach Nowacek. The guidelines have been refined a number of times by Dr. Esteva for clarity
marias_guidelines2='''
1.	Focus on describing the dataset so it can be understood independently from related research products such as a published paper. 
2.	Describe the context in which the dataset was created (study goals).
3.	Mention the type(s) of porous media being investigated.
4.	Address the research problem that the data is helping to solve (high level research question).
5.	Address how (reproducibility, generating new studies, validation, Machine Learning, understanding porous flow, etc.) and who will benefit from reusing the data (geoscientists, water resources managers, petroleum engineers, etc.).
6.	Describe the methodology for data collection (imaging, experimental, simulation, ML methodology that produced the data.)
7.	Provide an overview of the contents and organization of the dataset (types of files, documentation material, structure of the folder.)
8.	Indicate if the data was quality controlled and how. 
9.	Keep descriptions clear and accessible for experts as well as general audiences. Avoid acronyms or spell them out.
10.	Include keywords that will help others search for the data. 
'''

marias_guidelines='''
1. Focus on describing the dataset so it can be understood independently from related research products such as a published paper. 
2. Describe the context in which the dataset was created (type of research method and goals).
3. Mention the type(s) of porous media being investigated.
4. Address the research problem that the data is helping to solve.
5. Address how (reproducibility, generating new studies, validation, Machine Learning, understanding porous flow, etc.) and who will benefit from reusing the data (geoscientists, water resources managers, petroleum engineers, etc.).
6. Describe the methodology for data collection (e.g. the imaging, experiment or simulation method that produced the data.)
7. Provide an overview of the contents and organization of the dataset.
8. Indicate if the data was quality controlled and how. 
9. Avoid acronyms or spell them out and keep descriptions accessible for experts as well as general audiences.
10. Include keywords that will help others search for the data. 
'''

guidelines='''
These are your guidelines to follow
1. Focus on describing the dataset so they can be understood independently from related research products such as a published paper or research code. 
2. Include a general statement that provides context to the study by which the dataset was created (e.g., The system under investigation…). 
3. Mention the type(s) of porous media being investigated.
4. Address the research problem that the data is helping to solve.
5. Address who will benefit from reusing the data and how (reproducibility, generating new studies, validation, Machine Learning, etc.)?
6. Describe methodology for data collection (e.g. the type of imaging or specific simulation method that produced the data)
7. Provide a very brief overview of the organization, scope, and contents of the dataset
8. Indicate if the data was quality controlled and how so. 
9. Keep descriptions concise, engaging, and accessible for experts as well as general audiences (avoid acronyms in particular)
10. Include key words
'''

# These are what will be fed as the system instructions for the LLM. All they do is define the role of the LLM and guide it as to what it should do.
instructions = "You are an expert data curator attempting to rate data descriptions for the Digital Porous Media Portal. You are provided 10 guidelines, each of which is worth one point. Descriptions only get the point if the guideline is addressed explicitly. You must score each data description from 1 to 10, 10 being the best. Follow the examples provided."

# These are the second iteration of system instructions. These needed to be changed when we moved to few_shot3 because instead of asking for a final score, as we had in previous instructions, we now only want the LLM to evaluate each guideline, no need for a final score.
instructions2 = "You are an expert data curator for the Digital Porous Media Portal. You are provided 10 guidelines, each of which is worth one point. Descriptions only get the point if the guideline is addressed explicitly. You are to evaluate the description for each guideline. Follow the examples provided. Only evaluate the 10 guidelines, do not try to sum everything at the end."

# This is used when writing the prompt to indicate the beginning of the decription that must be graded
preface = "\nNow follows the description you must rate. Do not round.\n"

In [None]:
# This is the third and final iteration of the few shot prompting (as of 7/21/25). This takes away the job of giving a final to the description from the LLM. Instead it is just asked to evaluate whether each guideline was met. The task of making the switch over to useable numbers will be done by passing the evaluation by another LLM to return a list of tuples with the relevant information.

few_shot3='''
EXAMPLE 1
Description: Physical processes in porous materials are of research interest due to their inherent geometric complexity. The equations that describe these processes are well-understood; however, approximating them numerically often requires extensive computational resources. Here we present transport simulation results and geometric features carried-out on samples hosted on the Digital Rocks Portal.  We hope that the scale and diversity of this dataset can offer unparalleled opportunities to researchers who are benchmarking simulators, training machine learning models, and developing correlations to acquire new insights into physical processes in porous media
Explanation: 
1. 0.5 points for partially explaining the data, independent from the project itself.
2. 0 points for lacking context to the study.
3. 0 points for failing to mention the porous media involved.
4. 0 points for not addressing the research problem that would be solved.
5. 1 point for addressing who may benefit from reuse.
6. 0.5 points for covering some of the methodological practices.
7. 0 points for not providing an overview of organization, scope, and context
8. 0 points for failing to indicate if the data was quality controlled.
9. 1 point for clear use of language.
10. 1 point for use of keywords.

EXAMPLE 2
Description: These data sets contain reconstructed micro-CT images of trapped nitrogen gas in porous media during the dissolution process and the formation of dissolution fingering. 
In total, eight data sets are available. Each data set contains the reconstructed micro-CT images obtained in different observation time during dissolution process. Seven of the data sets were performed by packing various granular particle sizes and ranges inside a 10-mm-diameter-cylindrical-container. Another data set was performed by packing granular particles inside a 20-mm-diameter-cylindrical-container. The dissolution fingering formation can be observed in some of the data sets in the packing inside a 10-mm-diameter-cylindrical-container. Clearer dissolution fingering formation can be observed inside a 20-mm-diameter-cylindrical-container.
These datasets were presented and utilized in the paper of "Effects of Dissolution Fingering on Mass Transfer Rate in Three-Dimensional Porous Media" (see Related Publications) in Water Resources Research in 2021. Full details of the experimental setup, procedure, and imaging analysis can be found in the paper and supporting information.
Explanation: 
1. 1 point for focusing on the dataset and not the research project.
2. 0 points as it lacks context to the study.
3. 0.5 points for some mention (though limited) of the porous media involved.
4. 0 points for not addressing the research problem that would be solved.
5. 0 points for not acknowledging who would benefit from reuse.
6. 1 point for sufficient description of methodology.
7. 1 point for description of how data is organized.
8. 0 points because there is no indication if the data was quality controlled
9. 1 point for clear use of language, including not using acronyms
10. 1 point for using keywords.

EXAMPLE 3
Description: Pore-scale digital images are usually obtained from micro computed tomography (micro-CT) data that has been segmented into void and grain space. Image segmentation is a critical process for digital rock analyses that can influence pore-scale characterisation studies and/or the numerical simulation of petrophysical properties. This project is to study on the sandstone micro-CT image segmentation by using convolutional neural network.
Explanation: 
1. 0 points for failing to describe the dataset
2. 0 points as it lacks context to the study.
3. 1 point for mentioning the materials involved.
4. 0 points as it does not address the research problem
5. 0 points for not identifying who could reuse the data.
6. 0.5 points for an attempt (though insufficient) to explain the methodology.
7. 0.5 points for beginning to describe the data's contents.
8. 0 points because there is no indication if the data was quality controlled.
9. 0 points for unclear language.
10. 0 points for failing to use keywords.
'''

In [None]:
# This second iteration fixed a few things. By putting the score after explaination, we encourage the LLM to justify its answer instead of jumping to it and then justifying later in a way that may lead to inconsistencies. We also encourage a more regular formatting and do not omit an evaluation for a guideline, even if it is not met. However, we are still expecting it to do the summation for us, which can lead to improper final scores. The LLM can frequently mess up that step of summing.
few_shot2 = '''
EXAMPLE 1
Description: Physical processes in porous materials are of research interest due to their inherent geometric complexity. The equations that describe these processes are well-understood; however, approximating them numerically often requires extensive computational resources. Here we present transport simulation results and geometric features carried-out on samples hosted on the Digital Rocks Portal.  We hope that the scale and diversity of this dataset can offer unparalleled opportunities to researchers who are benchmarking simulators, training machine learning models, and developing correlations to acquire new insights into physical processes in porous media
Explanation: 
1. 0.5 points for partially explaining the data, independent from the project itself.
2. 0 points for lacking context to the study.
3. 0 points for failing to mention the porous media involved.
4. 0 points for not addressing the research problem that would be solved.
5. 1 point for addressing who may benefit from reuse.
6. 0.5 points for covering some of the methodological practices.
7. 0 points for not providing an overview of organization, scope, and context
8. 0 points for failing to indicate if the data was quality controlled.
9. 1 point for clear use of language.
10. 1 point for use of keywords.
Sum = 0.5+0+0+0+1+0.5+0+0+1+1=4
Final Score: 4

EXAMPLE 2
Description: These data sets contain reconstructed micro-CT images of trapped nitrogen gas in porous media during the dissolution process and the formation of dissolution fingering. 
In total, eight data sets are available. Each data set contains the reconstructed micro-CT images obtained in different observation time during dissolution process. Seven of the data sets were performed by packing various granular particle sizes and ranges inside a 10-mm-diameter-cylindrical-container. Another data set was performed by packing granular particles inside a 20-mm-diameter-cylindrical-container. The dissolution fingering formation can be observed in some of the data sets in the packing inside a 10-mm-diameter-cylindrical-container. Clearer dissolution fingering formation can be observed inside a 20-mm-diameter-cylindrical-container.
These datasets were presented and utilized in the paper of "Effects of Dissolution Fingering on Mass Transfer Rate in Three-Dimensional Porous Media" (see Related Publications) in Water Resources Research in 2021. Full details of the experimental setup, procedure, and imaging analysis can be found in the paper and supporting information.
Explanation: 
1. 1 point for focusing on the dataset and not the research project.
2. 0 points as it lacks context to the study.
3. 0.5 points for some mention (though limited) of the porous media involved.
4. 0 points for not addressing the research problem that would be solved.
5. 0 points for not acknowledging who would benefit from reuse.
6. 1 point for sufficient description of methodology.
7. 1 point for description of how data is organized.
8. 0 points because there is no indication if the data was quality controlled
9. 1 point for clear use of language, including not using acronyms
10. 1 point for using keywords.
Sum = 1+0+0.5+0+0+1+1+0+1+1=5.5
Score: 5.5

EXAMPLE 3
Description: Pore-scale digital images are usually obtained from micro computed tomography (micro-CT) data that has been segmented into void and grain space. Image segmentation is a critical process for digital rock analyses that can influence pore-scale characterisation studies and/or the numerical simulation of petrophysical properties. This project is to study on the sandstone micro-CT image segmentation by using convolutional neural network.
Explanation: 
1. 0 points for failing to describe the dataset
2. 0 points as it lacks context to the study.
3. 1 point for mentioning the materials involved.
4. 0 points as it does not address the research problem
5. 0 points for not identifying who could reuse the data.
6. 0.5 points for an attempt (though insufficient) to explain the methodology.
7. 0.5 points for beginning to describe the data's contents.
8. 0 points because there is no indication if the data was quality controlled.
9. 0 points for unclear language.
10. 0 points for failing to use keywords.
Sum = 0+0+1+0+0+0.5+0.5+0+0+0=2
Score: 2
'''

In [None]:
# This was our first iteration of few shot prompting. While a fine starting point, many issues regarding formatting and logical order make this a poor way to do the prompting. These include encouraging premature grading (by putting the score before the evaluation) and a failure to go through each guideline and evaluate whether it was met or not.

few_shot = '''
EXAMPLE 1
Score: 4
Description: Physical processes in porous materials are of research interest due to their inherent geometric complexity. The equations that describe these processes are well-understood; however, approximating them numerically often requires extensive computational resources. Here we present transport simulation results and geometric features carried-out on samples hosted on the Digital Rocks Portal.  We hope that the scale and diversity of this dataset can offer unparalleled opportunities to researchers who are benchmarking simulators, training machine learning models, and developing correlations to acquire new insights into physical processes in porous media
Explanation: 1 point for clear use of language. 1 point for use of keywords. 1 point for addressing who may benefit from reuse. A half a point for covering some of the methodological practices. And a final half point for explaining the data, independent from the project itself.

EXAMPLE 2
Score: 6.5
Description: These data sets contain reconstructed micro-CT images of trapped nitrogen gas in porous media during the dissolution process and the formation of dissolution fingering. 
In total, eight data sets are available. Each data set contains the reconstructed micro-CT images obtained in different observation time during dissolution process. Seven of the data sets were performed by packing various granular particle sizes and ranges inside a 10-mm-diameter-cylindrical-container. Another data set was performed by packing granular particles inside a 20-mm-diameter-cylindrical-container. The dissolution fingering formation can be observed in some of the data sets in the packing inside a 10-mm-diameter-cylindrical-container. Clearer dissolution fingering formation can be observed inside a 20-mm-diameter-cylindrical-container.
These datasets were presented and utilized in the paper of "Effects of Dissolution Fingering on Mass Transfer Rate in Three-Dimensional Porous Media" (see Related Publications) in Water Resources Research in 2021. Full details of the experimental setup, procedure, and imaging analysis can be found in the paper and supporting information.
Explaination: 1 point for clear use of language. 1 point for sufficient description of methodology. 1 point for description of how data is organized. 1 point for using keywords. 1 point for not using acronyms. 1 point for focusing on the dataset and not the research project. And a final half point for some mention (though limited) of the porous media involved.

EXAMPLE 3
Score: 2
Description: Pore-scale digital images are usually obtained from micro computed tomography (micro-CT) data that has been segmented into void and grain space. Image segmentation is a critical process for digital rock analyses that can influence pore-scale characterisation studies and/or the numerical simulation of petrophysical properties. This project is to study on the sandstone micro-CT image segmentation by using convolutional neural network.
Explaination: 1 point for mentioning the materials involved. A half point for an attempt (though insufficient) to explain the methodology. And one more half point for beginning to describe the data's contents
'''

In [None]:
# This function takes in a prompt and passes it to the LLM for a response. You may optionally specify which system instructions are to be passed. The content of the response is returned
def grade(prompt,which_instructions=instructions2):
    response = client.chat.completions.create(
        model=SAMBASTUDIO_MODEL,
        messages=[{"role":"system","content":which_instructions},{"role":"user","content":prompt}],
        temperature =  TEMP,
        top_p = TOP_P
    )
    return response.choices[0].message.content

In [None]:
# This function takes in the evaluation provided by grade and asks the LLM to return a list of tuples where the first number is the guideline number and the second is the score for that guideline. Structured generation could be used here in the future. Bear in mind this will return a string that will need to be parsed using eval()
def breakdown_score(evaluation):
    response = client.chat.completions.create(
        model=SAMBASTUDIO_MODEL,
        messages=[{"role":"system","content":"You will be given an evaluation of a data description. We need to know the individualized score breakdown. Please provide a list of 10 tuples, each tuple should have two values (the guideline #, the score). Do not return any other text"},{"role":"user","content":evaluation}],
        temperature =  TEMP,
        top_p = TOP_P
    )
    return response.choices[0].message.content        

In [None]:
# This will return a dictionary of three items. 'orig' which is the original description as provided from the DPMP. 'eval' which is the guideline-by-guideline evaluation generated by the grade function. And 'breakdown' which is the list of tuples provided by breakdown_score so we may use the evaluations for graphs and summary statistics
def review(desc,full_text=False):
    score = {}
    if full_text:
        prompt = marias_guidelines2 + few_shot3 + preface + desc
        score['orig'] = desc
    else:
        prompt = marias_guidelines2 + few_shot3 + preface + descriptions.get(desc)
        score['orig'] = descriptions.get(desc)
    evaluation = grade(prompt)
    score["eval"] = evaluation
    score["breakdown"] = breakdown_score(evaluation)
    return score

In [None]:
# Provide this function with the name of the file to which you'd like to write all the grading and you will get a rating for every description
def review_all(output_path):
    all_reviews = {}
    for desc in descriptions:
        all_reviews[desc] = review(desc)
    filename = output_path+'.json'
    text_file = open(filename, "w")
    text_file.write(json.dumps(all_reviews,indent=4))
    text_file.close()

In [None]:
# Example of how review_all might work
review_all('./make_reviews')