In [None]:
from dotenv import load_dotenv

load_dotenv()

## LangSmith Analysis

In [None]:
## Setup LangSmith
from langsmith import Client
client = Client()

In [None]:
for dataset in client.list_datasets():
    print(dataset.name)

In [None]:
for project in client.list_projects(reference_dataset_name="blockchain_solana"):
    print(project)

In [None]:
project.feedback_stats

In [None]:
project.id

In [None]:
results = {
    "id": f"{project.id}",
    "name": project.name,
    "url": project.url,
    "dataset" : project.extra["metadata"]["dataset"],
    "collection" : project.extra["metadata"]["collection"],
    "eval-model": project.extra["metadata"]["eval-model"],
    "eval-run": project.extra["metadata"]["run"],
    "start_time": project.start_time.isoformat(),
    "last_run_start_time": project.last_run_start_time.isoformat(),
    "run_count": project.run_count,
    "latency_p50": project.latency_p50.total_seconds(),
    "latency_p99": project.latency_p99.total_seconds(),
    "prompt_tokens": project.prompt_tokens,
    "completion_tokens": project.completion_tokens,
    "total_tokens" : project.total_tokens,
    "answer_correctness_n" : project.feedback_stats["answer_correctness_score"]["n"],
    "answer_correctness_avg" : project.feedback_stats["answer_correctness_score"]["avg"],
    "answer_relevancy_n" : project.feedback_stats["answer_relevancy_score"]["n"],
    "answer_relevancy_avg" : project.feedback_stats["answer_relevancy_score"]["avg"],
    "context_recall_n": project.feedback_stats["context_recall_score"]["n"],
    "context_recall_avg": project.feedback_stats["context_recall_score"]["avg"],
    "context_relevancy_n": project.feedback_stats["context_relevancy_score"]["n"],
    "context_relevancy_avg": project.feedback_stats["context_relevancy_score"]["avg"],
    "faithfulness_n": project.feedback_stats["faithfulness_score"]["n"],
    "faithfulness_avg": project.feedback_stats["faithfulness_score"]["avg"],
    }
results

In [None]:
import json

with open("results.jsonl", "w") as out_file:
    for dataset in client.list_datasets():
        for project in client.list_projects(reference_dataset_id=dataset.id):
            if project.total_tokens:
                results = {
                    "id": f"{project.id}",
                    "name": project.name,
                    "url": project.url,
                    "dataset" : project.extra["metadata"]["dataset"],
                    "collection" : project.extra["metadata"]["collection"],
                    "eval-model": project.extra["metadata"]["eval-model"],
                    "eval-run": project.extra["metadata"]["run"],
                    "start_time": project.start_time.isoformat(),
                    "last_run_start_time": project.last_run_start_time.isoformat(),
                    "run_count": project.run_count,
                    "latency_p50": project.latency_p50.total_seconds(),
                    "latency_p99": project.latency_p99.total_seconds(),
                    "prompt_tokens": project.prompt_tokens,
                    "completion_tokens": project.completion_tokens,
                    "total_tokens" : project.total_tokens,
                    "answer_correctness_n" : project.feedback_stats["answer_correctness_score"]["n"],
                    "answer_correctness_avg" : project.feedback_stats["answer_correctness_score"]["avg"],
                    "answer_relevancy_n" : project.feedback_stats["answer_relevancy_score"]["n"],
                    "answer_relevancy_avg" : project.feedback_stats["answer_relevancy_score"]["avg"],
                    "context_recall_n": project.feedback_stats["context_recall_score"]["n"],
                    "context_recall_avg": project.feedback_stats["context_recall_score"]["avg"],
                    "context_relevancy_n": project.feedback_stats["context_relevancy_score"]["n"],
                    "context_relevancy_avg": project.feedback_stats["context_relevancy_score"]["avg"],
                    "faithfulness_n": project.feedback_stats["faithfulness_score"]["n"],
                    "faithfulness_avg": project.feedback_stats["faithfulness_score"]["avg"],
                }
            else:
                results = {
                    "id": f"{project.id}",
                    "name": project.name,
                    "url": project.url,
                    "dataset" : project.extra["metadata"]["dataset"],
                    "collection" : project.extra["metadata"]["collection"],
                    "eval-model": project.extra["metadata"]["eval-model"],
                    "eval-run": project.extra["metadata"]["run"],
                    "start_time": project.start_time.isoformat(),
                    "last_run_start_time": None,
                    "run_count": project.run_count,
                    "latency_p50": None,
                    "latency_p99": None,
                    "prompt_tokens": project.prompt_tokens,
                    "completion_tokens": project.completion_tokens,
                    "total_tokens" : project.total_tokens,
                    "answer_correctness_n" : None,
                    "answer_correctness_avg" : None,
                    "answer_relevancy_n" : None,
                    "answer_relevancy_avg" : None,
                    "context_recall_n": None,
                    "context_recall_avg": None,
                    "context_relevancy_n": None,
                    "context_relevancy_avg": None,
                    "faithfulness_n": None,
                    "faithfulness_avg": None,
                }

            out_file.write(json.dumps(results) + "\n")

In [None]:
import pandas

df = pandas.read_json("results.jsonl", orient="records", lines=True)
df.to_csv("results.csv", header=True)

## TruLens Analysis

In [None]:
%pip install psycopg2-binary


In [None]:
from trulens_eval import Tru
import os

tru = Tru(database_url=os.getenv("TRULENS_DB_CONN_STRING"))

In [None]:
app_ids = []

for app in tru.get_apps():
    app_id = app["app_id"]
    if app_id.endswith("_512"):
        app_ids.append(app_id)


In [None]:
tru.get_leaderboard(app_ids=app_ids)

In [None]:
tru.reset_database()