In [None]:
import { useMemo } from "react";

function ProductPage({ productId, referrer, theme }) {
	// Every time the theme changes, this will be a different function...
	function handleSubmit(orderDetails) {
		post("/product/" + productId + "/buy", {
			referrer,
			orderDetails,
		});
	}

	return (
		<div className={theme}>
			{/* ... so ShippingForm's props will never be the same, and it will re-render every time */}
			<ShippingForm onSubmit={handleSubmit} />
		</div>
	);
}

function ProductPage({ productId, referrer, theme }) {
	// Tell React to cache your function between re-renders...
	const handleSubmit = useCallback(
		(orderDetails) => {
			post("/product/" + productId + "/buy", {
				referrer,
				orderDetails,
			});
		},
		[productId, referrer]
	); // ...so as long as these dependencies don't change...

	return (
		<div className={theme}>
			{/* ...ShippingForm will receive the same props and can skip re-rendering */}
			<ShippingForm onSubmit={handleSubmit} />
		</div>
	);
}

function handleSubmit(referrer, productId) {
	return function (orderDetails) {
		post("/product/" + productId + "/buy", {
			referrer,
			orderDetails,
		});
	};
}
function ProductPage({ productId, referrer, theme }) {
	const handleSubmit = useMemo(
		() => handleSubmit(productId, referrer),
		[productId, referrer]
	);
	return (
		<div className={theme}>
			<ShippingForm onSubmit={handleSubmit} />
		</div>
	);
}


In [9]:
stri1 = """# Project Beta Review

This is the secondary analysis for Beta.
While promising, there are overlaps.

## Shared Observations

1. Velocity increased by 20% in Q3.
2. API rate limits are being hit frequently.

## Beta Specifics

- The new UI framework is causing blocking.
- Mobile adoption is up 40%."""


def getSubstringIndices(string, substring):
	start_index = string.find(substring)
	end_index = start_index + len(substring)
	return (start_index, end_index)

In [12]:
getSubstringIndices(stri1, "The new UI framework is causing blocking.")

(229, 270)

In [None]:
from analysis_processor import AnalysisProcessor

data_source = {
	"openai": "https://openai.com/policies/privacy-policy/",
	"anthropic": "https://www.anthropic.com/legal/privacy",
	# "perplexity": "https://www.perplexity.ai/hub/legal/privacy-policy",	# HTTPError: 403 Client Error: Forbidden for url: https://www.perplexity.ai/hub/legal/privacy-policy
	# "deepseek": "https://cdn.deepseek.com/policies/en-US/deepseek-privacy-policy.html", # Returns None
}
a = AnalysisProcessor(data_source, debug=False)
a.runAnalyses()

a.runSubstringAnalysis("FACT_EMBEDDINGS")
a.runSubstringAnalysis("CROSS_FACTS")

ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'Resource exhausted. Please try again later. Please refer to https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429 for more details.', 'status': 'RESOURCE_EXHAUSTED'}}

Found High Likelihood affirmation: similarity 0.8004843377655538 
Question: Does the privacy policy affirm that the company is the controller responsible for the processing of Personal Data for users living in the European Economic Area (EEA)?
Substring: If you live in the European Economic Area (EEA), UK or Switzerland (the “European Region”), the data controller responsible for your personal data is Anthropic Ireland, Limited. If you live outside the European Region, the data controller responsible for your personal data is Anthropic PBC.


KeyboardInterrupt: 

In [None]:
data_source = {
	"openai": "https://openai.com/policies/privacy-policy/",
	# "anthropic": "https://www.anthropic.com/legal/privacy",
	# "perplexity": "https://www.perplexity.ai/hub/legal/privacy-policy",	# HTTPError: 403 Client Error: Forbidden for url: https://www.perplexity.ai/hub/legal/privacy-policy
	# "deepseek": "https://cdn.deepseek.com/policies/en-US/deepseek-privacy-policy.html", # Returns None
}

In [None]:
print(len(list(temp_cache_questions.keys())))
temp_cache_questions.keys()

In [None]:
a = AnalysisProcessor(data_source)
a.runAnalyses()

In [None]:
a = AnalysisProcessor(data_source, mode=1)

a.runSubstringAnalysis("FACT_EMBEDDINGS")

In [None]:
import numpy as np

# 1. Setup: Create dummy data for demonstration
# In this scenario:
# Index 2 is a duplicate of Index 0
# Index 3 is a duplicate of Index 1
# Index 4 is a duplicate of Index 0 (and Index 2)
embeddings = np.array(
	[
		[1.0, 0.0],	# 0: Original A
		[0.0, 1.0],	# 1: Original B
		[1.0, 0.0],	# 2: Copy of A
		[0.0, 1.0],	# 3: Copy of B
		[1.0, 0.0],	# 4: Copy of A
	]
)

# 2. Normalize (Standard procedure for cosine similarity via dot product)
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
normalized_emb = embeddings / norms

# 3. Compute Similarity Matrix
sim_matrix = normalized_emb @ normalized_emb.T

is_duplicate_matrix = np.triu(sim_matrix > 0.98, k=1)

duplicates_mask = np.any(is_duplicate_matrix, axis=0)

masters_all = np.argmax(is_duplicate_matrix, axis=0)

indices_to_drop = np.where(duplicates_mask)[0]
associated_masters = masters_all[duplicates_mask]

# Output
print(f"Indices to drop:    {indices_to_drop}")
print(f"Associated Masters: {associated_masters}")

# Verification Loop
for drop, master in zip(indices_to_drop, associated_masters):
	print(f"Item {drop} is a duplicate of Item {master}")

In [None]:
qs = loadQuestions(debug=False)
for k in qs:
	print(k)

In [None]:
a.runSubstringAnalysis("TESTING")

In [None]:
question_data = temp_cache_questions.copy()
import numpy as np
from scipy.spatial.distance import pdist, squareform


def get_most_similar_mahalanobis(question_data):
	"""
	Identifies the two most semantically similar strings using Mahalanobis distance.

	Args:
	    question_data (dict): Dictionary of form {string: {"embedding_vector": [list]}}

	Returns:
	    tuple: (string_A, string_B, distance_score)
	"""

	keys = list(question_data.keys())

	if len(keys) < 2:
		return None, None, 0.0

	X = np.array([question_data[k]["embedding_vector"] for k in keys])

	cov_matrix = np.cov(X, rowvar=False)

	inv_cov_matrix = np.linalg.pinv(cov_matrix)

	condensed_distances = pdist(X, metric="mahalanobis", VI=inv_cov_matrix)

	min_index_condensed = np.argmin(condensed_distances)
	min_distance = condensed_distances[min_index_condensed]

	dist_matrix = squareform(condensed_distances)

	np.fill_diagonal(dist_matrix, np.inf)

	i, j = np.unravel_index(np.argmin(dist_matrix), dist_matrix.shape)

	return keys[i], keys[j], min_distance


get_most_similar_mahalanobis(question_data)