In [None]:
!pip install datasets anthropic openai

In [None]:
import os
import json
import random
import logging
import anthropic
import openai
import numpy as np

from getpass import getpass
from datasets import load_dataset
from scipy.spatial.distance import cdist

In [None]:
logging.basicConfig(level=logging.INFO)

In [None]:
ANTHROPIC_API_KEY = getpass("ANTHROPIC_API_KEY: ")
anthropic_client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

OPENAI_API_KEY = getpass("ANTHROPIC_API_KEY: ")
openai_client = openai.OpenAI(api_key=OPENAI_API_KEY)

In [None]:
def load_data(split_percentage='validation[:1%]') -> list:
    dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split=split_percentage)
    return [sentence.strip() for item in dataset for sentence in item['text'].split('\n') if sentence.strip()]

In [None]:
def classify_sentence(sentence: str, model_description: str) -> str:
    """Classify a sentence with the given model description."""
    prompt = (
        f"Given that a reward model {model_description}, classify the following sentence as 'positive' (+), "
        f"'neutral' (0), or 'negative' (-):\n\n{sentence}\n\nOutput only a single token (+, 0 or -) \
        based on the relation of the sentence to the reward model."
    )
    try:
        response = anthropic_client.messages.create(
            model="claude-3-haiku-20240307",
            max_tokens=10,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.content[0].text if response.content else "Error"
    except Exception as e:
        logging.error("Error in classify_sentence: %s", e)
        return "Error"

In [None]:
def get_embeddings(sentences: list) -> np.array:
    try:
        response = openai_client.Embedding.create(input=sentences, model="text-embedding-3-small")
        return np.array([embedding['embedding'] for embedding in response['data']])
    except Exception as e:
        logging.error("Embedding error: %s", e)
        return None

In [None]:
def generate_triples(sentences: list, description: str, embed: bool = False) -> list:
    sentiment_dict = {'positive': [], 'neutral': [], 'negative': []}
    for sentence in sentences:
        sentiment = classify_sentence(sentence, description)
        if sentiment in sentiment_dict:
            sentiment_dict[sentiment].append(sentence)

    if embed:
        for sentiment, sents in sentiment_dict.items():
            if sents:
                embeddings = get_embeddings(sents)
                if embeddings is not None:
                    distance_matrix = cdist(embeddings, embeddings, 'euclidean')
                    indices = np.argmin(distance_matrix + np.eye(len(sents)) * 1e10, axis=1)
                    sentiment_dict[sentiment] = [sents[i] for i in indices]

    min_length = min(len(sentiment_dict[key]) for key in sentiment_dict)
    return [(sentiment_dict['negative'][i], sentiment_dict['neutral'][i], sentiment_dict['positive'][i]) for i in range(min_length)]

In [None]:
def store_triples(triples: list, output_path='triples.json'):
    with open(output_path, 'w', encoding='utf-8') as file:
        json.dump(triples, file, ensure_ascii=False, indent=4)

In [None]:
def main(embed: bool = False):
    sentences = load_data()
    triples = generate_triples(sentences, REWARD_MODEL_DESCRIPTION, embed=embed)
    store_triples(triples)

In [None]:
REWARD_MODEL_DESCRIPTION = 'calculates reward based on how positive the sentiment of the input is'

In [None]:
if __name__ == "__main__":
    main(embed=True)

### If you're planning on making this an installable package this might be more useful:

In [None]:
class SentenceClassifier:
    def __init__(self, description: str, api_clients: dict):
        self.description = description
        self.anthropic_client = api_clients['anthropic']
        self.openai_client = api_clients['openai']
        self.sentiment_dict = {'positive': [], 'neutral': [], 'negative': []}

    def load_data(self, split_percentage='validation[:1%]') -> list:
        dataset = load_dataset("wikitext", "wikitext-103-raw-v1", split=split_percentage)
        return [sentence.strip() for item in dataset for sentence in item['text'].split('\n') if sentence.strip()]

    def classify_sentence(self, sentence: str) -> str:
        prompt = (
            f"Given that a reward model {self.description}, classify the following sentence as 'positive' (+), "
            f"'neutral' (0), or 'negative' (-):\n\n{sentence}\n\nOutput only a single token (+, 0 or -) \
            based on the relation of the sentence to the reward model."
        )
        response = self.anthropic_client.messages.create(model="claude-3-haiku-20240307", max_tokens=10, messages=[{"role": "user", "content": prompt}])
        return response.content[0].text.strip() if response.content else "Error"

    def get_embeddings(self, sentences: list) -> np.array:
        response = self.openai_client.Embedding.create(input=sentences, model="text-embedding-3-small")
        return np.array([embedding['embedding'] for embedding in response['data']])

    def categorize_sentences(self, sentences: list):
        for sentence in sentences:
            sentiment = self.classify_sentence(sentence)
            if sentiment in self.sentiment_dict:
                self.sentiment_dict[sentiment].append(sentence)

    def embed_and_sort_sentences(self):
        for sentiment, sents in self.sentiment_dict.items():
            if sents:
                embeddings = self.get_embeddings(sents)
                indices = self.get_closest_indices(embeddings)
                self.sentiment_dict[sentiment] = [sents[i] for i in indices]

    def get_closest_indices(self, embeddings: np.array) -> list:
        distance_matrix = cdist(embeddings, embeddings, 'euclidean')
        np.fill_diagonal(distance_matrix, np.inf)
        return np.argmin(distance_matrix, axis=1)

    def generate_triples(self, embed: bool = False) -> list:
        if embed:
            self.embed_and_sort_sentences()

        min_length = min(len(self.sentiment_dict[key]) for key in self.sentiment_dict)
        return [(self.sentiment_dict['negative'][i], self.sentiment_dict['neutral'][i], self.sentiment_dict['positive'][i]) for i in range(min_length)]

In [None]:
api_clients = {
    'anthropic': anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")),
    'openai': openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
}
classifier = SentenceClassifier(description='calculates reward based on how positive the sentiment of the input is', api_clients=api_clients)
sentences = classifier.load_data()
classifier.categorize_sentences(sentences)
triples = classifier.generate_triples(embed=True)