Merge branch 'main' of https://github.com/iamgroot42/mimir

iamgroot42 · Mar 27, 2024 · b4a9223 · b4a9223
2 parents 576339b + 2a95067
commit b4a9223
Show file tree

Hide file tree

Showing 7 changed files with 147 additions and 132 deletions.
diff --git a/docs/attacks/loss.html b/docs/attacks/loss.html
@@ -46,7 +46,7 @@ <h1 class="title">Module <code>mimir.attacks.loss</code></h1>
         &#34;&#34;&#34;
             LOSS-score. Use log-likelihood from model.
         &#34;&#34;&#34;
-        return self.model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
+        return self.target_model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
 </details>
 </section>
 <section>
@@ -78,7 +78,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
             LOSS-score. Use log-likelihood from model.
         &#34;&#34;&#34;
-        return self.model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
+        return self.target_model.get_ll(document, probs=probs, tokens=tokens)</code></pre>
 </details>
 <h3>Ancestors</h3>
 <ul class="hlist">

diff --git a/docs/attacks/min_k.html b/docs/attacks/min_k.html
@@ -45,7 +45,7 @@ <h1 class="title">Module <code>mimir.attacks.min_k</code></h1>
     @ch.no_grad()
     def _attack(self, document, probs, tokens=None, **kwargs):
         &#34;&#34;&#34;
-        Min-k % Prob Attack. Gets model praobbilities and returns likelihood when computed over top k% of ngrams.
+        Min-k % Prob Attack. Gets model probabilities and returns likelihood when computed over top k% of ngrams.
         &#34;&#34;&#34;
         # Hyper-params specific to min-k attack
         k: float = kwargs.get(&#34;k&#34;, 0.2)
@@ -55,7 +55,7 @@ <h1 class="title">Module <code>mimir.attacks.min_k</code></h1>
         all_prob = (
             probs
             if probs is not None
-            else self.model.get_probabilities(document, tokens=tokens)
+            else self.target_model.get_probabilities(document, tokens=tokens)
         )
         # iterate through probabilities by ngram defined by window size at given stride
         ngram_probs = []
@@ -94,7 +94,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
     @ch.no_grad()
     def _attack(self, document, probs, tokens=None, **kwargs):
         &#34;&#34;&#34;
-        Min-k % Prob Attack. Gets model praobbilities and returns likelihood when computed over top k% of ngrams.
+        Min-k % Prob Attack. Gets model probabilities and returns likelihood when computed over top k% of ngrams.
         &#34;&#34;&#34;
         # Hyper-params specific to min-k attack
         k: float = kwargs.get(&#34;k&#34;, 0.2)
@@ -104,7 +104,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         all_prob = (
             probs
             if probs is not None
-            else self.model.get_probabilities(document, tokens=tokens)
+            else self.target_model.get_probabilities(document, tokens=tokens)
         )
         # iterate through probabilities by ngram defined by window size at given stride
         ngram_probs = []

diff --git a/docs/attacks/neighborhood.html b/docs/attacks/neighborhood.html
@@ -619,7 +619,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
 </code></dt>
 <dd>
 <div class="desc"><p>Base class (for LLMs).</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -1074,7 +1074,7 @@ <h3>Inherited members</h3>
 </code></dt>
 <dd>
 <div class="desc"><p>Base class (for LLMs).</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
@@ -1367,7 +1367,7 @@ <h3>Inherited members</h3>
 </code></dt>
 <dd>
 <div class="desc"><p>Base class (for LLMs).</p>
-<p>Initializes internal Module state, shared by both nn.Module and ScriptModule.</p></div>
+<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>

diff --git a/docs/attacks/quantile.html b/docs/attacks/quantile.html
@@ -121,7 +121,7 @@ <h1 class="title">Module <code>mimir.attacks.quantile</code></h1>
         # Step 1: Use non-member dataset, collect confidence scores for correct label.
         # Get likelihood scores from target model for known_non_members
         # Note that these non-members should be different from the ones in testing
-        scores = [self.model.get_ll(x) for x in known_non_members]
+        scores = [self.target_model.get_ll(x) for x in known_non_members]
         # Construct a dataset out of this to be used in Huggingface, with
         # &#34;text&#34; containing the actual data, and &#34;labels&#34; containing the scores
         dataset = Dataset.from_dict({&#34;text&#34;: known_non_members, &#34;labels&#34;: scores})
@@ -133,7 +133,7 @@ <h1 class="title">Module <code>mimir.attacks.quantile</code></h1>
         # Step 3: Test by checking if member: score is higher than output of quantile regression model.
 
         # Get likelihood score from target model for doc
-        ll = self.model.get_ll(document)
+        ll = self.target_model.get_ll(document)
 
         # Return ll - quantile_model(doc)
         tokenized = self.ref_model.tokenizer(document, return_tensors=&#34;pt&#34;)
@@ -361,7 +361,7 @@ <h3>Methods</h3>
         # Step 1: Use non-member dataset, collect confidence scores for correct label.
         # Get likelihood scores from target model for known_non_members
         # Note that these non-members should be different from the ones in testing
-        scores = [self.model.get_ll(x) for x in known_non_members]
+        scores = [self.target_model.get_ll(x) for x in known_non_members]
         # Construct a dataset out of this to be used in Huggingface, with
         # &#34;text&#34; containing the actual data, and &#34;labels&#34; containing the scores
         dataset = Dataset.from_dict({&#34;text&#34;: known_non_members, &#34;labels&#34;: scores})
@@ -373,7 +373,7 @@ <h3>Methods</h3>
         # Step 3: Test by checking if member: score is higher than output of quantile regression model.
 
         # Get likelihood score from target model for doc
-        ll = self.model.get_ll(document)
+        ll = self.target_model.get_ll(document)
 
         # Return ll - quantile_model(doc)
         tokenized = self.ref_model.tokenizer(document, return_tensors=&#34;pt&#34;)
@@ -413,7 +413,7 @@ <h3>Methods</h3>
     # Step 1: Use non-member dataset, collect confidence scores for correct label.
     # Get likelihood scores from target model for known_non_members
     # Note that these non-members should be different from the ones in testing
-    scores = [self.model.get_ll(x) for x in known_non_members]
+    scores = [self.target_model.get_ll(x) for x in known_non_members]
     # Construct a dataset out of this to be used in Huggingface, with
     # &#34;text&#34; containing the actual data, and &#34;labels&#34; containing the scores
     dataset = Dataset.from_dict({&#34;text&#34;: known_non_members, &#34;labels&#34;: scores})

diff --git a/docs/attacks/reference.html b/docs/attacks/reference.html
@@ -50,7 +50,7 @@ <h1 class="title">Module <code>mimir.attacks.reference</code></h1>
         &#34;&#34;&#34;
         loss = kwargs.get(&#39;loss&#39;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         ref_loss = self.ref_model.get_ll(document, probs=probs, tokens=tokens)
         return loss - ref_loss</code></pre>
 </details>
@@ -89,7 +89,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
         loss = kwargs.get(&#39;loss&#39;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         ref_loss = self.ref_model.get_ll(document, probs=probs, tokens=tokens)
         return loss - ref_loss</code></pre>
 </details>

diff --git a/docs/attacks/zlib.html b/docs/attacks/zlib.html
@@ -59,7 +59,7 @@ <h1 class="title">Module <code>mimir.attacks.zlib</code></h1>
         &#34;&#34;&#34;
         loss = kwargs.get(&#34;loss&#34;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         zlib_entropy = len(zlib.compress(bytes(document, &#34;utf-8&#34;)))
         return loss / zlib_entropy</code></pre>
 </details>
@@ -103,7 +103,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
         &#34;&#34;&#34;
         loss = kwargs.get(&#34;loss&#34;, None)
         if loss is None:
-            loss = self.model.get_ll(document, probs=probs, tokens=tokens)
+            loss = self.target_model.get_ll(document, probs=probs, tokens=tokens)
         zlib_entropy = len(zlib.compress(bytes(document, &#34;utf-8&#34;)))
         return loss / zlib_entropy</code></pre>
 </details>