diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py
index b8f5e03bc..169fe441c 100644
--- a/src/ragas/evaluation.py
+++ b/src/ragas/evaluation.py
@@ -37,7 +37,6 @@ def evaluate(
     dataset : Dataset[question: list[str], contexts: list[list[str]], answer: list[str]]
         The dataset in the format of ragas which the metrics will use to score the RAG
         pipeline with
-
     metrics : list[Metric] , optional
         List of metrics to use for evaluation. If not provided then ragas will run the
         evaluation on the best set of metrics to give a complete view.
diff --git a/src/ragas/metrics/context_relevance.py b/src/ragas/metrics/context_relevance.py
index a0dd0aa0d..8560ed55d 100644
--- a/src/ragas/metrics/context_relevance.py
+++ b/src/ragas/metrics/context_relevance.py
@@ -81,22 +81,37 @@ def evaluate(self, answers: List[List[str]]) -> np.float_:
 
 @dataclass
 class ContextRelevancy(Metric):
-
     """
-    params
-    strictness: Integer, controls the number of times sentence extraction is
-    performed to quantify uncertainty from the LLM. Defaults to 2.
-    agreement_metric: bert_score or jaccard_score, used to measure agreement
-    between multiple samples.
-    model_name: any encoder model. Used for calculating bert_score.
+    Extracts sentences from the context that are relevant to the question with
+    self-consistancy checks. The number of relevant sentences and is used as the score.
+
+    Attributes
+    ----------
+    name : str
+    batch_size : int
+        Batch size for openai completion.
+    strictness : int
+        Controls the number of times sentence extraction is performed to quantify
+        uncertainty from the LLM. Defaults to 2.
+    agreement_metric : str
+        "bert_score" or "jaccard_score", used to measure agreement between multiple
+        samples.
+    model_name : str
+        any encoder model. Used for calculating bert_score.
     """
 
     name: str = "context_relavency"
     batch_size: int = 15
-    agreement_metric: str = "bert_score"
     strictness: int = 2
+    agreement_metric: str = "bert_score"
     model_name: str = "cross-encoder/stsb-TinyBERT-L-4"
 
+    def __post_init__(self: t.Self):
+        if self.agreement_metric == "bert_score" and self.model_name is None:
+            raise ValueError(
+                "model_name must be provided when agreement_metric is bert_score"
+            )
+
     def init_model(self: t.Self):
         self.sent_agreement = SentenceAgreement(
             model_name=self.model_name, metric=self.agreement_metric
@@ -104,7 +119,14 @@ def init_model(self: t.Self):
 
     def score(self: t.Self, dataset: Dataset) -> Dataset:
         """
+        Parameters
+        ----------
         dataset: Dataset[question: list[str], contexts: list[list[str]]]
+
+        Returns
+        -------
+        Dataset[question: list[str], contexts: list[list[str]], scores: list[float]]
+            Dataset with the scores for each row.
         """
         prompts = []
         questions, contexts = dataset["question"], dataset["contexts"]