### These are all the functions used to create graphs. Specifically those found in the abstract written for the Data Curation Conference

In [None]:
# Necessary libraries for this notebook
import os
import json
import regex as re
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Open and load the json file containing all the evaluations as provided by the LlamaReviews script.
filename = './ReviewsExample.json'
with open(filename,'r') as file:
    scoring = json.load(file)

In [None]:
# This function returns a list containing one total score for each of the data descriptions
def get_total_scores():
    individual_scores = []
    for score in scoring:
        breakdown = eval(scoring.get(score).get('breakdown'))
        full_score = 0
        for guideline in breakdown:
            full_score += guideline[1]
        individual_scores.append(full_score)
    return individual_scores

In [None]:
# Returns the average of the scores
def get_average():
    scores = get_total_scores()
    return sum(scores)/len(scores)

get_average()

In [None]:
# Provides the correctly formatted distribution of scores for the guideline-by-guideline breakdown graph
def get_guideline_distribution():
    guideline_breakdown = [[0,0,0] for i in range(10)]
    for score in scoring:
        for guide in eval(scoring.get(score).get('breakdown')):
            if guide[1]==0:
                guideline_breakdown[guide[0]-1][0]+=1
            if guide[1]==0.5:
                guideline_breakdown[guide[0]-1][1]+=1
            if guide[1]==1:
                guideline_breakdown[guide[0]-1][2]+=1
    
    zeros = [score[0] for score in guideline_breakdown]
    halves = [score[1] for score in guideline_breakdown]
    fulls = [score[2] for score in guideline_breakdown]
    # If you want to add values for guideline 11 (word count) you can do that here
    # zeros.append(164)
    # halves.append(0)
    # fulls.append(12)
    return (zeros,halves,fulls)

In [None]:
# This will generate you a histogram of all of the data description scores
plt.hist(get_total_scores(), bins=[i/2 for i in range(0,21,1)], color='skyblue', edgecolor='black')
plt.xlabel("Data Description Score")
plt.ylabel("Dataset Description Count")
plt.title("Graph of Data Description Scores for DPMP")
plt.xlim(0,10)
plt.xticks(range(11))
plt.show()
# plt.savefig('score_distribution.png',dpi=300)

In [None]:
# Generates the guideline-by-guideline breakdown figure
categories = [str(i+1) for i in range(10)]

distribution = get_guideline_distribution()

bar_width = 0.25
index = np.arange(10)

plt.bar(index - bar_width, distribution[0], bar_width, label='No Point',color='#e57373')
plt.bar(index, distribution[1], bar_width, label='Half Point',color='#fdd835')
plt.bar(index + bar_width, distribution[2], bar_width, label='Full Point',color='#81c784')

plt.xlabel('Guideline Number')
plt.ylabel('Dataset Description Count')
plt.title('Score Breakdown by Guideline')
plt.xticks(index, categories)
plt.legend()
plt.tight_layout()
plt.show()
# plt.savefig('guideline_breakdown_final.png',dpi=300)

In [None]:
# Creates the presentation of a data description and its evaluation as seen in the abstract
def make_evaluation(DRP):
    evaluation = scoring.get(DRP).get('eval')
    desc = scoring.get(DRP).get('orig')
    breakdown = eval(scoring.get(DRP).get('breakdown'))
    score = 0
    for guideline in breakdown:
        score+=guideline[1]
    print(f"#### {DRP}\n\nOriginal Description: {desc}\n\nLLM Evaluation: {evaluation}\n\nFinal Score: {score}")

# make_evaluation("DRP-333.json")

In [None]:
# Check what percentage of description scored the provided threshold or higher (so it is inclusive)
def check_percentage_above(threshold):
    good = 0
    individual_scores = get_total_scores()
    for score in individual_scores:
        if score >= threshold:
            good+=1
    return good/len(individual_scores)

# check_percentage_above(8)