diff --git a/docs/howtos/customizations/metrics/write_your_own_metric.ipynb b/docs/howtos/customizations/metrics/write_your_own_metric.ipynb
index bb47013e2..66407cfe6 100644
--- a/docs/howtos/customizations/metrics/write_your_own_metric.ipynb
+++ b/docs/howtos/customizations/metrics/write_your_own_metric.ipynb
@@ -62,7 +62,7 @@
    "source": [
     "from ragas.llms import llm_factory\n",
     "\n",
-    "evaluator_llm = llm_factory('gpt-4o')"
+    "evaluator_llm = llm_factory(\"gpt-4o\")"
    ]
   },
   {
@@ -104,7 +104,7 @@
     "hallucinations_binary = AspectCritic(\n",
     "    name=\"hallucinations_binary\",\n",
     "    definition=\"Did the model hallucinate or add any information that was not present in the retrieved context?\",\n",
-    "    llm=evaluator_llm\n",
+    "    llm=evaluator_llm,\n",
     ")\n",
     "\n",
     "await hallucinations_binary.single_turn_ascore(eval_dataset[0])"
@@ -163,9 +163,7 @@
     "from ragas.metrics import RubricsScoreWithoutReference\n",
     "\n",
     "hallucinations_rubric = RubricsScoreWithoutReference(\n",
-    "    name=\"hallucinations_rubric\",\n",
-    "    llm=evaluator_llm,\n",
-    "    rubrics=rubric\n",
+    "    name=\"hallucinations_rubric\", llm=evaluator_llm, rubrics=rubric\n",
     ")\n",
     "\n",
     "await hallucinations_rubric.single_turn_ascore(eval_dataset[0])"
@@ -215,19 +213,28 @@
     "from ragas.callbacks import Callbacks\n",
     "from ragas.dataset_schema import SingleTurnSample\n",
     "\n",
+    "\n",
     "@dataclass\n",
     "class HallucinationsMetric(MetricWithLLM, SingleTurnMetric):\n",
     "    # name of the metric\n",
     "    name: str = \"hallucinations_metric\"\n",
     "    # we need to define the required columns for the metric\n",
-    "    _required_columns: t.Dict[MetricType, t.Set[str]] = field(default_factory=lambda: {MetricType.SINGLE_TURN: {\"user_input\", \"response\", \"retrieved_contexts\"}})\n",
+    "    _required_columns: t.Dict[MetricType, t.Set[str]] = field(\n",
+    "        default_factory=lambda: {\n",
+    "            MetricType.SINGLE_TURN: {\"user_input\", \"response\", \"retrieved_contexts\"}\n",
+    "        }\n",
+    "    )\n",
     "\n",
     "    def __post_init__(self):\n",
     "        # init the faithfulness metric\n",
     "        self.faithfulness_metric = Faithfulness(llm=self.llm)\n",
     "\n",
-    "    async def _single_turn_ascore(self, sample: SingleTurnSample, callbacks: Callbacks) -> float:\n",
-    "        faithfulness_score = await self.faithfulness_metric.single_turn_ascore(sample, callbacks)\n",
+    "    async def _single_turn_ascore(\n",
+    "        self, sample: SingleTurnSample, callbacks: Callbacks\n",
+    "    ) -> float:\n",
+    "        faithfulness_score = await self.faithfulness_metric.single_turn_ascore(\n",
+    "            sample, callbacks\n",
+    "        )\n",
     "        return 1 - faithfulness_score"
    ]
   },
@@ -269,12 +276,8 @@
     "from ragas import evaluate\n",
     "\n",
     "results = evaluate(\n",
-    "    eval_dataset, \n",
-    "    metrics=[\n",
-    "        hallucinations_metric,\n",
-    "        hallucinations_rubric,\n",
-    "        hallucinations_binary\n",
-    "    ], \n",
+    "    eval_dataset,\n",
+    "    metrics=[hallucinations_metric, hallucinations_rubric, hallucinations_binary],\n",
     ")"
    ]
   },
diff --git a/src/ragas/callbacks.py b/src/ragas/callbacks.py
index 014eabb23..7b16059f5 100644
--- a/src/ragas/callbacks.py
+++ b/src/ragas/callbacks.py
@@ -57,13 +57,13 @@ class ChainType(Enum):
 
 
 class ChainRun(BaseModel):
-    run_id: uuid.UUID
-    parent_run_id: t.Optional[uuid.UUID]
+    run_id: str
+    parent_run_id: t.Optional[str]
     name: str
     inputs: t.Dict[str, t.Any]
     metadata: t.Dict[str, t.Any]
     outputs: t.Dict[str, t.Any] = Field(default_factory=dict)
-    children: t.List[uuid.UUID] = Field(default_factory=list)
+    children: t.List[str] = Field(default_factory=list)
 
 
 class ChainRunEncoder(json.JSONEncoder):
@@ -72,12 +72,14 @@ def default(self, o):
             return str(o)
         if isinstance(o, ChainType):
             return o.value
+        # if isinstance(o, EvaluationResult):
+        #     return ""
         return json.JSONEncoder.default(self, o)
 
 
 @dataclass
 class RagasTracer(BaseCallbackHandler):
-    traces: t.Dict[uuid.UUID, ChainRun] = field(default_factory=dict)
+    traces: t.Dict[str, ChainRun] = field(default_factory=dict)
 
     def on_chain_start(
         self,
@@ -90,17 +92,17 @@ def on_chain_start(
         metadata: t.Optional[t.Dict[str, t.Any]] = None,
         **kwargs: t.Any,
     ) -> t.Any:
-        self.traces[run_id] = ChainRun(
-            run_id=run_id,
-            parent_run_id=parent_run_id,
+        self.traces[str(run_id)] = ChainRun(
+            run_id=str(run_id),
+            parent_run_id=str(parent_run_id) if parent_run_id else None,
             name=serialized["name"],
             inputs=inputs,
             metadata=metadata or {},
             children=[],
         )
 
-        if parent_run_id and parent_run_id in self.traces:
-            self.traces[parent_run_id].children.append(run_id)
+        if parent_run_id and str(parent_run_id) in self.traces:
+            self.traces[str(parent_run_id)].children.append(str(run_id))
 
     def on_chain_end(
         self,
@@ -109,12 +111,11 @@ def on_chain_end(
         run_id: uuid.UUID,
         **kwargs: t.Any,
     ) -> t.Any:
-        self.traces[run_id].outputs = outputs
+        self.traces[str(run_id)].outputs = outputs
 
     def to_jsons(self) -> str:
         return json.dumps(
             [t.model_dump() for t in self.traces.values()],
-            indent=4,
             cls=ChainRunEncoder,
         )
 
@@ -131,7 +132,7 @@ def __str__(self):
 
 
 def parse_run_traces(
-    traces: t.Dict[uuid.UUID, ChainRun],
+    traces: t.Dict[str, ChainRun],
 ) -> t.List[t.Dict[str, t.Any]]:
     root_traces = [
         chain_trace
diff --git a/src/ragas/dataset_schema.py b/src/ragas/dataset_schema.py
index 0a59c2485..d3b4978d7 100644
--- a/src/ragas/dataset_schema.py
+++ b/src/ragas/dataset_schema.py
@@ -8,13 +8,12 @@
 from datasets import Dataset as HFDataset
 from pydantic import BaseModel, field_validator
 
-from ragas.callbacks import parse_run_traces
+from ragas.callbacks import ChainRunEncoder, parse_run_traces
 from ragas.cost import CostCallbackHandler
 from ragas.messages import AIMessage, HumanMessage, ToolCall, ToolMessage
-from ragas.utils import safe_nanmean
+from ragas.utils import RAGAS_API_URL, safe_nanmean
 
 if t.TYPE_CHECKING:
-    import uuid
     from pathlib import Path
 
     from datasets import Dataset as HFDataset
@@ -375,7 +374,7 @@ class EvaluationResult:
     binary_columns: t.List[str] = field(default_factory=list)
     cost_cb: t.Optional[CostCallbackHandler] = None
     traces: t.List[t.Dict[str, t.Any]] = field(default_factory=list)
-    ragas_traces: t.Dict[uuid.UUID, ChainRun] = field(default_factory=dict, repr=False)
+    ragas_traces: t.Dict[str, ChainRun] = field(default_factory=dict, repr=False)
 
     def __post_init__(self):
         # transform scores from list of dicts to dict of lists
@@ -395,6 +394,13 @@ def __post_init__(self):
         # parse the traces
         self.traces = parse_run_traces(self.ragas_traces)
 
+    def __repr__(self) -> str:
+        score_strs = [f"'{k}': {v:0.4f}" for k, v in self._repr_dict.items()]
+        return "{" + ", ".join(score_strs) + "}"
+
+    def __getitem__(self, key: str) -> t.List[float]:
+        return self._scores_dict[key]
+
     def to_pandas(self, batch_size: int | None = None, batched: bool = False):
         """
         Convert the result to a pandas DataFrame.
@@ -487,9 +493,36 @@ def total_cost(
             cost_per_input_token, cost_per_output_token, per_model_costs
         )
 
-    def __repr__(self) -> str:
-        score_strs = [f"'{k}': {v:0.4f}" for k, v in self._repr_dict.items()]
-        return "{" + ", ".join(score_strs) + "}"
+    def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
+        from datetime import datetime, timezone
+
+        import requests
+
+        timestamp = datetime.now(timezone.utc).isoformat()
+        root_trace = [
+            trace for trace in self.ragas_traces.values() if trace.parent_run_id is None
+        ][0]
+        packet = json.dumps(
+            {
+                "run_id": str(root_trace.run_id),
+                "created_at": timestamp,
+                "evaluation_run": [t.model_dump() for t in self.ragas_traces.values()],
+            },
+            cls=ChainRunEncoder,
+        )
 
-    def __getitem__(self, key: str) -> t.List[float]:
-        return self._scores_dict[key]
+        response = requests.post(
+            f"{base_url}/alignment/evaluation",
+            data=packet,
+            headers={"Content-Type": "application/json"},
+        )
+
+        if response.status_code != 200:
+            raise Exception(f"Failed to upload results: {response.text}")
+
+        evaluation_endpoint = (
+            f"https://app.ragas.io/alignment/evaluation/{root_trace.run_id}"
+        )
+        if verbose:
+            print(f"Evaluation results uploaded! View at {evaluation_endpoint}")
+        return evaluation_endpoint
diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py
index 81fa62cae..94faa877a 100644
--- a/src/ragas/evaluation.py
+++ b/src/ragas/evaluation.py
@@ -344,7 +344,7 @@ def evaluate(
             ragas_traces=tracer.traces,
         )
         if not evaluation_group_cm.ended:
-            evaluation_rm.on_chain_end(result)
+            evaluation_rm.on_chain_end({"scores": result.scores})
     finally:
         # reset llms and embeddings if changed
         for i in llm_changed:
diff --git a/src/ragas/metrics/_context_entities_recall.py b/src/ragas/metrics/_context_entities_recall.py
index 6976d69ce..2a40c2cf8 100644
--- a/src/ragas/metrics/_context_entities_recall.py
+++ b/src/ragas/metrics/_context_entities_recall.py
@@ -23,7 +23,9 @@ class EntitiesList(BaseModel):
 
 class ExtractEntitiesPrompt(PydanticPrompt[StringIO, EntitiesList]):
     name: str = "text_entity_extraction"
-    instruction: str = "Given a text, extract unique entities without repetition. Ensure you consider different forms or mentions of the same entity as a single entity."
+    instruction: str = (
+        "Given a text, extract unique entities without repetition. Ensure you consider different forms or mentions of the same entity as a single entity."
+    )
     input_model = StringIO
     output_model = EntitiesList
     examples = [
diff --git a/src/ragas/metrics/_context_precision.py b/src/ragas/metrics/_context_precision.py
index f8b02a200..e935e8f02 100644
--- a/src/ragas/metrics/_context_precision.py
+++ b/src/ragas/metrics/_context_precision.py
@@ -33,7 +33,9 @@ class Verification(BaseModel):
 
 class ContextPrecisionPrompt(PydanticPrompt[QAC, Verification]):
     name: str = "context_precision"
-    instruction: str = 'Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output.'
+    instruction: str = (
+        'Given question, answer and context verify if the context was useful in arriving at the given answer. Give verdict as "1" if useful and "0" if not with json output.'
+    )
     input_model = QAC
     output_model = Verification
     examples = [
@@ -157,17 +159,17 @@ async def _ascore(
         user_input, retrieved_contexts, reference = self._get_row_attributes(row)
         responses = []
         for context in retrieved_contexts:
-            verdicts: t.List[
-                Verification
-            ] = await self.context_precision_prompt.generate_multiple(
-                data=QAC(
-                    question=user_input,
-                    context=context,
-                    answer=reference,
-                ),
-                n=self.reproducibility,
-                llm=self.llm,
-                callbacks=callbacks,
+            verdicts: t.List[Verification] = (
+                await self.context_precision_prompt.generate_multiple(
+                    data=QAC(
+                        question=user_input,
+                        context=context,
+                        answer=reference,
+                    ),
+                    n=self.reproducibility,
+                    llm=self.llm,
+                    callbacks=callbacks,
+                )
             )
 
             responses.append([result.model_dump() for result in verdicts])
diff --git a/src/ragas/metrics/_context_recall.py b/src/ragas/metrics/_context_recall.py
index d685deddc..e655957e4 100644
--- a/src/ragas/metrics/_context_recall.py
+++ b/src/ragas/metrics/_context_recall.py
@@ -41,7 +41,9 @@ class ContextRecallClassificationPrompt(
     PydanticPrompt[QCA, ContextRecallClassifications]
 ):
     name: str = "context_recall_classification"
-    instruction: str = "Given a context, and an answer, analyze each sentence in the answer and classify if the sentence can be attributed to the given context or not. Use only 'Yes' (1) or 'No' (0) as a binary classification. Output json with reason."
+    instruction: str = (
+        "Given a context, and an answer, analyze each sentence in the answer and classify if the sentence can be attributed to the given context or not. Use only 'Yes' (1) or 'No' (0) as a binary classification. Output json with reason."
+    )
     input_model = QCA
     output_model = ContextRecallClassifications
     examples = [
@@ -148,17 +150,17 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
         assert self.llm is not None, "set LLM before use"
 
         # run classification
-        classifications_list: t.List[
-            ContextRecallClassifications
-        ] = await self.context_recall_prompt.generate_multiple(
-            data=QCA(
-                question=row["user_input"],
-                context="\n".join(row["retrieved_contexts"]),
-                answer=row["reference"],
-            ),
-            llm=self.llm,
-            callbacks=callbacks,
-            n=self.reproducibility,
+        classifications_list: t.List[ContextRecallClassifications] = (
+            await self.context_recall_prompt.generate_multiple(
+                data=QCA(
+                    question=row["user_input"],
+                    context="\n".join(row["retrieved_contexts"]),
+                    answer=row["reference"],
+                ),
+                llm=self.llm,
+                callbacks=callbacks,
+                n=self.reproducibility,
+            )
         )
         classification_dicts = []
         for classification in classifications_list:
diff --git a/src/ragas/metrics/_summarization.py b/src/ragas/metrics/_summarization.py
index ce9843110..0cdb09d63 100644
--- a/src/ragas/metrics/_summarization.py
+++ b/src/ragas/metrics/_summarization.py
@@ -31,7 +31,9 @@ class AnswersGenerated(BaseModel):
 
 class ExtractKeyphrasePrompt(PydanticPrompt[StringIO, ExtractedKeyphrases]):
     name: str = "extract_keyphrases"
-    instruction: str = "Extract keyphrases of type: Person, Organization, Location, Date/Time, Monetary Values, and Percentages."
+    instruction: str = (
+        "Extract keyphrases of type: Person, Organization, Location, Date/Time, Monetary Values, and Percentages."
+    )
     input_model = StringIO
     output_model = ExtractedKeyphrases
     examples: t.List[t.Tuple[StringIO, ExtractedKeyphrases]] = [
@@ -62,7 +64,9 @@ class GenerateQuestionsPrompt(
     PydanticPrompt[GenerateQuestionsPromptInput, QuestionsGenerated]
 ):
     name: str = "generate_questions"
-    instruction: str = "Based on the given text and keyphrases, generate closed-ended questions that can be answered with '1' if the question can be answered using the text, or '0' if it cannot. The questions should ALWAYS result in a '1' based on the given text."
+    instruction: str = (
+        "Based on the given text and keyphrases, generate closed-ended questions that can be answered with '1' if the question can be answered using the text, or '0' if it cannot. The questions should ALWAYS result in a '1' based on the given text."
+    )
     input_model = GenerateQuestionsPromptInput
     output_model = QuestionsGenerated
     examples: t.List[t.Tuple[GenerateQuestionsPromptInput, QuestionsGenerated]] = [
@@ -99,7 +103,9 @@ class SummaryAndQuestions(BaseModel):
 
 class GenerateAnswersPrompt(PydanticPrompt[SummaryAndQuestions, AnswersGenerated]):
     name: str = "generate_answers"
-    instruction: str = "Based on the list of close-ended '1' or '0' questions, generate a JSON with key 'answers', which is a list of strings that determines whether the provided summary contains sufficient information to answer EACH question. Answers should STRICTLY be either '1' or '0'. Answer '0' if the provided summary does not contain enough information to answer the question and answer '1' if the provided summary can answer the question."
+    instruction: str = (
+        "Based on the list of close-ended '1' or '0' questions, generate a JSON with key 'answers', which is a list of strings that determines whether the provided summary contains sufficient information to answer EACH question. Answers should STRICTLY be either '1' or '0'. Answer '0' if the provided summary does not contain enough information to answer the question and answer '1' if the provided summary can answer the question."
+    )
     input_model = SummaryAndQuestions
     output_model = AnswersGenerated
     examples: t.List[t.Tuple[SummaryAndQuestions, AnswersGenerated]] = [
diff --git a/src/ragas/metrics/_topic_adherence.py b/src/ragas/metrics/_topic_adherence.py
index 140962fa9..ae55ddffb 100644
--- a/src/ragas/metrics/_topic_adherence.py
+++ b/src/ragas/metrics/_topic_adherence.py
@@ -97,7 +97,9 @@ class TopicRefusedPrompt(PydanticPrompt[TopicRefusedInput, TopicRefusedOutput]):
     ]
 
 
-class TopicExtractionPrompt(PydanticPrompt[TopicExtractionInput, TopicExtractionOutput]):
+class TopicExtractionPrompt(
+    PydanticPrompt[TopicExtractionInput, TopicExtractionOutput]
+):
     instruction: str = (
         "Given an interaction between Human, Tool and AI, extract the topics from Human's input."
     )
diff --git a/src/ragas/utils.py b/src/ragas/utils.py
index a9bc34390..7f1a42037 100644
--- a/src/ragas/utils.py
+++ b/src/ragas/utils.py
@@ -19,6 +19,8 @@
 RAGAS_SUPPORTED_LANGUAGE_CODES = {
     v.__name__.lower(): k for k, v in LANGUAGE_CODES.items()
 }
+# endpoint for uploading results
+RAGAS_API_URL = "https://api.ragas.io"
 
 
 @lru_cache(maxsize=1)