<a href="https://colab.research.google.com/github/drahmel/Advanced-Joomla/blob/master/CGP_Scoring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CGP Runnable Evaluation Model

Scoring / Emissions / Evaluation algo for the Conversation Genome Project


In [40]:
import json
import os
import random
from datetime import datetime, timezone
from traceback import print_exception

import torch

spacy = None
Matcher = None
try:
    import spacy
    from spacy.matcher import Matcher
except:
    print("Please install spacy to run locally")


import numpy as np
from scipy.stats import skew

class logging:
    time_format = '%Y-%m-%d %H:%M:%S'
    def debug(*args, **kwargs):
        now = datetime.now(timezone.utc)
        print(now.strftime(logging.time_format), "DEBUG", " | ", *args[1:], sep="  ")
    def info(*args, **kwargs):
        now = datetime.now(timezone.utc)
        print(now.strftime(logging.time_format), "INFO", " | ", *args[1:], sep="  ")
    def error(*args, **kwargs):
        now = datetime.now(timezone.utc)
        print(now.strftime(logging.time_format), "ERROR", " | ", *args[1:], sep="  ")

class llm_spacy:
    nlp = None
    verbose = False

    def get_nlp(self):
        nlp = self.nlp
        dataset = "en_core_web_lg"  # ~600mb
        if not nlp:
            # Manual download
            # python -m spacy download en_core_web_sm
            # Faster small and medium models:
            # en_core_web_sm and en_core_web_md

            if not spacy.util.is_package(dataset):
                print(f"Downloading spacy model {dataset}...")
                spacy.cli.download(dataset)
                print("Model {dataset} downloaded successfully!")

            nlp = spacy.load(dataset)
            print(f"Spacy {dataset} Vector dimensionality: {nlp.vocab.vectors_length}")
            self.nlp = nlp
        return nlp

    async def simple_text_to_tags(self, body, min_tokens=5):
        nlp = self.get_nlp()

        # Define patterns
        adj_noun_pattern = [{"POS": "ADJ"}, {"POS": "NOUN"}]
        pronoun_pattern = [{"POS": "PRON"}]
        unique_word_pattern = [{"POS": {"IN": ["NOUN", "VERB", "ADJ"]}, "IS_STOP": False}]

        # Initialize the Matcher with the shared vocabulary
        matcher = Matcher(nlp.vocab)
        matcher.add("ADJ_NOUN_PATTERN", [adj_noun_pattern])
        matcher.add("PRONOUN_PATTERN", [pronoun_pattern])
        matcher.add("UNIQUE_WORD_PATTERN", [unique_word_pattern])

        doc = nlp( body )
        if self.verbose:
            print("DOC", doc)
        matches = matcher(doc)
        matches_dict = {}
        for match_id, start, end in matches:
            span = doc[start:end]
            if self.verbose:
                print("Span text", span.text)
            matchPhrase = span.lemma_
            if len(matchPhrase) > min_tokens:
                if self.verbose:
                    print(f"Original: {span.text}, Lemma: {span.lemma_} Vectors: {span.vector.tolist()}")
                if not matchPhrase in matches_dict:
                    matches_dict[matchPhrase] = {"tag":matchPhrase, "count":0, "vectors":span.vector.tolist()}
                matches_dict[matchPhrase]['count'] += 1

        return matches_dict

    async def get_neighborhood(self, response, tag_count_ceiling=None):
        all_vectors = []
        count = 0
        for key, val in response.items():
            all_vectors.append(val['vectors'])
            count += 1
            if tag_count_ceiling and count > tag_count_ceiling:
                break
        if self.verbose:
            print("all_vectors",all_vectors )
        # Create a vector representing the entire content by averaging the vectors of all tokens
        if len(all_vectors) > 0:
            neighborhood_vector = np.mean(all_vectors, axis=0)
            return neighborhood_vector
        else:
            return None

    def score_vector_similarity(self, neighborhood_vectors, individual_vectors):
        # Calculate the similarity score between the neighborhood_vectors and the individual_vectors
        # If all vectors are 0.0, the vector wasn't found for scoring in the embedding score
        if np.all(individual_vectors==0):
            return 0
        # Calculate the cosine similarity between two sets of vectors
        similarity_score = np.dot(neighborhood_vectors, individual_vectors) / (np.linalg.norm(neighborhood_vectors) * np.linalg.norm(individual_vectors))
        #print(f"Similarity score between the content and the tag: {similarity_score}")
        return similarity_score



class MockBt:
    def __init__(self):
        self.logging = logging()

bt = MockBt()

def compare_arrays(arr1, arr2):
    result_dict = {}

    set1 = set(arr1)
    set2 = set(arr2)

    result_dict["both"] = list(set1.intersection(set2))
    result_dict["unique_1"] = list(set1.difference(set2))
    result_dict["unique_2"] = list(set2.difference(set1))

    return result_dict

class Evaluator:
    min_tags = 3

    async def evaluate(self, full_convo_tags=None, miner_responses=None, body=None, exampleList=None):
        if not miner_responses and exampleList:
            miner_responses = []
            for idx, examples in enumerate(exampleList):
                miner_responses.append({"uid":idx, "tags":examples})
        if not full_convo_tags and body:
            response = await llm.simple_text_to_tags(body, min_tokens=0)
            print(f"Found tags for main convesation: {list(response.keys())}")
            neighborhood_vector = await llm.get_neighborhood(response)
            full_convo_tags = list(response.keys())
        final_scores = []
        now = datetime.now(timezone.utc)
        num_responses = len(miner_responses)
        bt.logging.debug(f"Starting eval of {num_responses} responses ...")

        scores = torch.zeros(num_responses)
        zero_score_mask = torch.ones(num_responses)
        rank_scores = torch.zeros(num_responses)

        avg_ages = torch.zeros(num_responses)
        avg_age_scores = torch.zeros(num_responses)
        uniqueness_scores = torch.zeros(num_responses)
        credit_author_scores = torch.zeros(num_responses)

        max_avg_age = 0

        spot_check_id_dict = dict()

        # quick integrity check and get spot_check_id_dict
        utcnow = datetime.now(timezone.utc)
        for idx, miner_response in enumerate(miner_responses):
            try:
                # Make sure there are enough tags to make processing worthwhile
                if miner_response is None or not miner_response or len(miner_response['tags']) < self.min_tags:
                    bt.logging.info(f"Only {len(miner_response['tags'])} tag(s) found for miner {miner_response['uid']}. Skipping.")
                    zero_score_mask[idx] = 0
                    continue
                diff = compare_arrays(full_convo_tags, miner_response['tags'])
                bt.logging.debug(f"uid: {miner_response['uid']} Both tag(s) count:{len(diff['both'])} / Miner unique: {diff['unique_2']} ")
            except Exception as e:
                bt.logging.error(f"Error while intitial checking {idx}-th response: {e}, 0 score")
                bt.logging.debug(print_exception(type(e), e, e.__traceback__))
                zero_score_mask[idx] = 0
            # Loop through tags that match the full convo and get the scores for those
            # These are de-emphasized -- they are more for validation
            both_tag_scores = []
            tag_count_ceiling = 5
            for tag in diff['both']:
                resp2 = await llm.simple_text_to_tags(tag, min_tokens=0)
                if len(resp2.keys()) == 0:
                    print(f"No vectors found for tag '{tag}'. Score of 0.")
                    both_tag_scores.append(0)
                    continue
                neighborhood_vector2 = await llm.get_neighborhood(resp2, tag_count_ceiling=tag_count_ceiling)
                #print("neighborhood_vector2", neighborhood_vector2)
                score = llm.score_vector_similarity(neighborhood_vector, neighborhood_vector2)
                both_tag_scores.append(score)
                print("Score", tag, score)
            if len(both_tag_scores) > 0:
                both_tag_scores_avg = np.mean(both_tag_scores)
                both_tag_scores_median = np.median(both_tag_scores)
            else:
                both_tag_scores_avg = 0.0
                both_tag_scores_median = 0.0
            # Calculate unique tags and then take to top 20
            unique_tag_scores = []
            for tag in diff['unique_2']:
                unique_tag_scores.append(self.get_full_convo_tag_score(tag))
            unique_tag_scores_avg = np.mean(unique_tag_scores)

            # TODO: Take full convo tags and generate semantic neighborhood
            # Figure out standard deviation for vectors in neighboardhood
            #       Test each unique term against neighboard -- how many SDs does term similarity score?
            # Weight score on SD similarity scores

            final_score = (both_tag_scores_avg * 0.3) + (unique_tag_scores_avg * 0.7)
            bt.logging.debug(f"Final score: {final_score} Both score avg: {both_tag_scores_avg} Unique score avg: {unique_tag_scores_avg}")
            final_scores.append(final_score)

        bt.logging.debug("Complete eval.", final_scores)

        return final_scores

    def get_full_convo_tag_score(self, tag):
        return 0.9

    async def calc_scores(self, neighborhood_vector, examples):
        scores = []
        tag_count_ceiling = 5
        # Remove duplicate tags
        tag_set = list(set(examples))
        for example in tag_set:
            resp2 = await llm.simple_text_to_tags(example, min_tokens=0)
            #print(example, resp2)
            if len(resp2.keys()) == 0:
                print(f"No vectors found for tag '{example}'. Score of 0.")
                scores.append(0)
                continue
            neighborhood_vector2 = await llm.get_neighborhood(resp2, tag_count_ceiling=tag_count_ceiling)
            #print("neighborhood_vector2", neighborhood_vector2)
            score = llm.score_vector_similarity(neighborhood_vector, neighborhood_vector2)
            scores.append(score)
            print("Score", example, score)
        mean_score = np.mean(scores)
        median_score = np.median(scores)
        skewness = skew(scores)
        return (median_score, skewness, mean_score)


# Testing Framework

Run the cell below to test scores against various keywords and content.

In [42]:
print("Setting up test data...")

body = """Today for lunch, I decided to have a colorful and healthy meal. I started off with a bowl of mixed greens, topped with some cherry tomatoes, cucumbers, and sliced avocado. I love incorporating fruits and vegetables into my meals as they are packed with vitamins and minerals that are essential for our bodies. The fresh and crisp vegetables added a nice crunch to my salad, making it a refreshing and satisfying choice.
Next, I had a grilled chicken wrap with a side of steamed broccoli. The wrap was filled with tender and juicy chicken, lettuce, tomatoes, and a drizzle of ranch dressing. It was a perfect balance of protein and veggies, making it a well-rounded meal. The steamed broccoli was a great addition as it provided a good source of fiber and other nutrients.
To satisfy my sweet tooth, I had a bowl of mixed fruit for dessert. It had a variety of fruits such as strawberries, blueberries, and grapes. Not only did it add some natural sweetness to my meal, but it also provided me with a boost of antioxidants and other beneficial nutrients.
Eating a nutritious and balanced lunch not only keeps me physically healthy but also helps me stay focused and energized for the rest of the day. It's important to make conscious choices and incorporate fruits and vegetables into our meals to maintain a healthy diet. After finishing my lunch, I felt satisfied and ready to tackle the rest of my day with a renewed sense of energy."""

exampleList = [
    ["apple", "lunch", "automobile", "banana", "pear", "dinner", "meal", "beef", "akjsdkajsdlkajl", "political party", "airliner"],
    ["apple", "lunch", "banana", "pear", "dinner", "meal", "beef"],
    ["akjsdkajsdlkajl", "political party", "airliner"],
    ["aircraft", "aviation", "flight", "passengers", "pilots", "cockpit", "air traffic control", "takeoff", "landing", "jet engines", "altitude", "airlines", "airports", "flight attendants", "airplane mode", "airworthiness", "boarding", "turbulence", "emergency exits", "cabin crew"],
    ["fruit", "apple", "orange", "banana", "grape", "strawberry", "mango", "watermelon", "pineapple", "kiwi", "peach", "plum", "cherry", "pear", "blueberry", "raspberry", "lemon", "lime", "fig", "coconut"],
    ["lunch", "food", "meal", "dining", "restaurant", "sandwich", "salad", "soup", "fast food", "takeout", "brunch", "picnic", "cafeteria", "lunch break", "healthy", "comfort food", "bag lunch", "leftovers", "vegetarian", "gluten-free"],
    ["apple", "apple", "apple", "apple", "apple", "apple", "apple", "apple", "apple", "apple", "apple", "apple", "apple", "apple", "akjsdkajsdlkajl"],
    ["apple", "akjsdkajsdlkajl1", "akjsdkajsdlkajl2", "akjsdkajsdlkajl3", "akjsdkajsdlkajl4", "akjsdkajsdlkajl5", "akjsdkajsdlkajl6", "akjsdkajsdlkajl7", "akjsdkajsdlkajl8"],
]

print("Running a basic spacy keyword test...")

llm = llm_spacy()
response = await llm.simple_text_to_tags(body, min_tokens=0)
print(f"Found tags for main convesation: {list(response.keys())}")
neighborhood_vector = await llm.get_neighborhood(response)
#print("neighborhood_vector", neighborhood_vector)
#examples = ["apple", "lunch", "automobile", "banana", "pear", "dinner", "meal", "beef", "akjsdkajsdlkajl", "political party", "airliner"]

scoreData = []
e = Evaluator()
for idx, examples in enumerate(exampleList):
    skewness = 0
    results = await e.calc_scores(neighborhood_vector, examples)
    (median_score, skewness, mean_score) = results
    adjustedScore = ( (0.7 * median_score) + (0.3 * mean_score) ) / 2
    scoreData.append({"uid": idx, "adjustedScore":adjustedScore})

    print(f"__________Num: {len(examples)} Median score: {median_score} Mean score: {mean_score} Skewness: {skewness}" )
print(scoreData)


Setting up test data...
Running a basic spacy keyword test...
Spacy en_core_web_lg Vector dimensionality: 300
Found tags for main convesation: ['today', 'lunch', 'I', 'decide', 'colorful', 'healthy', 'healthy meal', 'meal', 'start', 'bowl', 'mixed', 'mixed green', 'green', 'top', 'cherry', 'tomato', 'cucumber', 'slice', 'avocado', 'love', 'incorporate', 'fruit', 'vegetable', 'my', 'they', 'pack', 'vitamin', 'mineral', 'that', 'essential', 'our', 'body', 'fresh', 'crisp', 'crisp vegetable', 'add', 'nice', 'nice crunch', 'crunch', 'salad', 'make', 'it', 'refreshing', 'satisfying', 'satisfying choice', 'choice', 'grill', 'chicken', 'wrap', 'steamed', 'steamed broccoli', 'broccoli', 'fill', 'tender', 'juicy', 'lettuce', 'drizzle', 'ranch', 'dressing', 'perfect', 'perfect balance', 'balance', 'protein', 'veggie', 'rounded', 'rounded meal', 'great', 'great addition', 'addition', 'provide', 'good', 'good source', 'source', 'fiber', 'other nutrient', 'nutrient', 'satisfy', 'sweet', 'sweet toot

# Full Evaluator run

Run this to execute the full validator routine as if miners returns valuations

In [36]:

body = """Today for lunch, I decided to have a colorful and healthy meal. I started off with a bowl of mixed greens, topped with some cherry tomatoes, cucumbers, and sliced avocado. I love incorporating fruits and vegetables into my meals as they are packed with vitamins and minerals that are essential for our bodies. The fresh and crisp vegetables added a nice crunch to my salad, making it a refreshing and satisfying choice.
Next, I had a grilled chicken wrap with a side of steamed broccoli. The wrap was filled with tender and juicy chicken, lettuce, tomatoes, and a drizzle of ranch dressing. It was a perfect balance of protein and veggies, making it a well-rounded meal. The steamed broccoli was a great addition as it provided a good source of fiber and other nutrients.
To satisfy my sweet tooth, I had a bowl of mixed fruit for dessert. It had a variety of fruits such as strawberries, blueberries, and grapes. Not only did it add some natural sweetness to my meal, but it also provided me with a boost of antioxidants and other beneficial nutrients.
Eating a nutritious and balanced lunch not only keeps me physically healthy but also helps me stay focused and energized for the rest of the day. It's important to make conscious choices and incorporate fruits and vegetables into our meals to maintain a healthy diet. After finishing my lunch, I felt satisfied and ready to tackle the rest of my day with a renewed sense of energy."""

# Example of miner responses structure:
miner_responses = [
    {"uid":1, "tags":["food", "banana", "pear"]},
]

e = Evaluator()
bt.logging.info("SCORES", await e.evaluate(body=body, exampleList=exampleList))


Found tags for main convesation: ['today', 'lunch', 'I', 'decide', 'colorful', 'healthy', 'healthy meal', 'meal', 'start', 'bowl', 'mixed', 'mixed green', 'green', 'top', 'cherry', 'tomato', 'cucumber', 'slice', 'avocado', 'love', 'incorporate', 'fruit', 'vegetable', 'my', 'they', 'pack', 'vitamin', 'mineral', 'that', 'essential', 'our', 'body', 'fresh', 'crisp', 'crisp vegetable', 'add', 'nice', 'nice crunch', 'crunch', 'salad', 'make', 'it', 'refreshing', 'satisfying', 'satisfying choice', 'choice', 'grill', 'chicken', 'wrap', 'steamed', 'steamed broccoli', 'broccoli', 'fill', 'tender', 'juicy', 'lettuce', 'drizzle', 'ranch', 'dressing', 'perfect', 'perfect balance', 'balance', 'protein', 'veggie', 'rounded', 'rounded meal', 'great', 'great addition', 'addition', 'provide', 'good', 'good source', 'source', 'fiber', 'other nutrient', 'nutrient', 'satisfy', 'sweet', 'sweet tooth', 'tooth', 'mixed fruit', 'dessert', 'variety', 'strawberry', 'blueberry', 'grape', 'natural', 'natural swee