In [1]:
from pinecone import Pinecone
import json
from openai import OpenAI
import pickle
import os
from anthropic import Anthropic


# Load the fitted encoder from a file for sparse embeddings
with open('bm25_encoder.pkl', 'rb') as f:
    bm25 = pickle.load(f)

# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'

# configure client
pc = Pinecone(api_key=api_key)

# Connect to your Pinecone index
index = pc.Index("actualism-website")

# Initialize OpenAI
client = OpenAI()

# Initialize the Anthropic client with your API key
api_key = os.environ.get('ANTHROPIC_API_KEY')
claude_client = Anthropic(api_key=api_key)


In [2]:
# Example usage
query = "how can i get started trying to become virtually free? what is the first thing to do?"

def rephrase_query_for_similarity_search(query):
    """
    Rephrase the given query using a language model to potentially enhance the effectiveness of the similarity search.
    
    Args:
        query (str): The original query string.
        
    Returns:
        str: A rephrased query.
    """
    # Initialize the OpenAI client
    client = OpenAI()
    
    # Prepare the messages for the chat completion to rephrase the query
    messages = [
        {"role": "system", "content": "You are an assistant who responds to queries by providing a detailed hypothetical answer (even if its a guess)."},
        {"role": "user", "content": query}
    ]
    
    # Call the OpenAI API to get the rephrased query
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        messages=messages,
        max_tokens=300,
        temperature=0
    )
    
    # Extract the rephrased query from the response
    rephrased_query = response.choices[0].message.content
    return rephrased_query

# Example usage
rephrased_query = rephrase_query_for_similarity_search(query)
print("Original Query:", query)
print("Rephrased Query:", rephrased_query)



Original Query: how can i get started trying to become virtually free? what is the first thing to do?
Rephrased Query: To begin your journey towards becoming virtually free, the first thing you can do is to assess your current situation and identify areas where you can start making changes. Here are some steps you could consider taking:

1. **Set Clear Goals**: Define what "virtually free" means to you. Is it about reducing your reliance on physical possessions, minimizing your digital footprint, or achieving financial independence through online work? Having a clear goal will help you stay focused.

2. **Declutter Your Digital Life**: Start by decluttering your digital devices and accounts. Delete unused apps, organize your files, and unsubscribe from unnecessary emails. This will help you streamline your digital presence and reduce distractions.

3. **Explore Remote Work Opportunities**: If your goal is to achieve financial independence through online work, start exploring remote job

In [3]:
def generate_embeddings(query):
    # Generate dense embedding
    dense_vector = client.embeddings.create(
        model="text-embedding-3-small",
        input=query,
        encoding_format="float",
        dimensions=1024
    ).data[0].embedding

    # Generate sparse embedding
    sparse_vector = bm25.encode_documents([query])[0]  # Ensure query is passed as a list
    return dense_vector, sparse_vector

def format_sparse_vector(sparse_vector):
    # Return the dictionary in the expected format for pinecone search
    return {
        'indices': sparse_vector['indices'],
        'values': sparse_vector['values']
    }

def hybrid_score_norm(dense, sparse, alpha: float):
    """Hybrid score using a convex combination

    alpha * dense + (1 - alpha) * sparse

    Args:
        dense: Array of floats representing
        sparse: a dict of `indices` and `values`
        alpha: scale between 0 and 1
    """
    if alpha < 0 or alpha > 1:
        raise ValueError("Alpha must be between 0 and 1")
    hs = {
        'indices': sparse['indices'],
        'values':  [v * (1 - alpha) for v in sparse['values']]
    }
    return [v * alpha for v in dense], hs



def search_query(query):
    dense_vector, sparse_vector = generate_embeddings(query)
    dense_vector, sparse_vector = hybrid_score_norm(dense_vector, sparse_vector, alpha=.9)

    # Perform the hybrid query
    query_response = index.query(
        namespace="",
        top_k=100,
        vector=dense_vector,
        sparse_vector=format_sparse_vector(sparse_vector),
        include_metadata=True
    )
    return query_response

results = search_query(query)
additional_results = search_query(rephrased_query)
results['matches'].extend(additional_results['matches'])

matches = results['matches']  # This accesses the list of matches
matches


[{'id': '20780',
  'metadata': {'chunk_end_index': 10400.0,
               'chunk_start_index': 9600.0,
               'filename': 'actualism---peter---selected-correspondence---corr-method5.md',
               'raw_string': ' human condition is to be  as happy and as '
                             'harmless as one can be in this moment and the end '
                             'goal is to become actually free form the human '
                             'condition. I never separated one from the other '
                             'at the start because for me they were '
                             'inseparable. **RESPONDENT:** *I have not fully '
                             'committed to achieving an actual freedom  yet. '
                             'But I have fully committed to achieving a virtual '
                             'freedom at least.* **PETER:** Is not a full '
                             'commitment to a partial success by definition '
                        

In [4]:
import cohere

co = cohere.Client("Cx6UeZmVWiQ5h0dLEKAXlhkMmGfoekeUoUsdj0K3")


# Extract the 'raw_string' from each match's metadata
docs = [match['metadata']['raw_string'] for match in matches]

response = co.rerank(
    model="rerank-english-v3.0",
    query=query,
    documents=docs,
    top_n=5
    )
response



In [5]:
# pull needed info from the cohere response
response_indices = [item.index for item in response.results]
relevance_scores = [item.relevance_score for item in response.results]


# Extract metadata and add relevance_score using the indices from the response
selected_metadata = []
for index, score in zip(response_indices, relevance_scores):
    metadata = matches[index]['metadata']
    metadata['relevance_score'] = score  # Add relevance score to the metadata
    selected_metadata.append(metadata)

def filter_metadata_by_relevance(metadata_list, score_threshold=0.999):
    # Filter metadata with relevance score >= score_threshold
    filtered_metadata = [metadata for metadata in metadata_list if metadata['relevance_score'] >= score_threshold]
    
    # If no metadata meets the threshold, take the one with the highest relevance score
    if not filtered_metadata:
        highest_relevance_metadata = max(metadata_list, key=lambda x: x['relevance_score'])
        filtered_metadata.append(highest_relevance_metadata)
    
    return filtered_metadata

# Example usage
filtered_metadata = filter_metadata_by_relevance(selected_metadata, score_threshold=0.1)

def calculate_confidence_score(metadata_list):
    if not metadata_list:
        return 0  # Return 0 if the list is empty

    # Calculate the sum of relevance scores
    sum_relevance = sum(item['relevance_score'] for item in metadata_list)
    
    # Calculate the average relevance score
    average_relevance = sum_relevance / len(metadata_list)
    
    # Calculate the number of documents
    num_documents = len(metadata_list)
    
    # Combine the metrics into a confidence score
    # Adjust the formula to weigh the sum of relevance scores more heavily
    # Normalize the score to be between 0 and 1
    score = (sum_relevance * average_relevance) / num_documents
    score
    
    return score

# Example usage
confidence_score = calculate_confidence_score(filtered_metadata)


In [6]:
filtered_metadata

[{'chunk_end_index': 11600.0,
  'chunk_start_index': 10800.0,
  'filename': 'richard---selectedcorrespondence---sc-virtualfreedom.md',
  'raw_string': '. One starts by dismantling the sense of social identity  that has been overlaid, from birth onward, over the innate self until one is virtually free from all the social mores and psittacisms ... those  mechanical repetitions of previously received ideas or images, reflecting neither apperception nor autonomous reasoning. One can be virtually free  from all the beliefs, ideas, values, theories, truths, customs, traditions, ideals, superstitions ... and all the other schemes and dreams. One can  become aware of all the socialisation, of all the conditioning, of all the programming, of all the methods and techniques that were used to produce  what one thinks and feels oneself to be ... a wayward social identity careering around in confusion and illusion. A \x91mature adult\x92 is actually  a lost, lonely, frightened and very cunning entit

In [7]:
import math


print(confidence_score)
def generate_chatgpt_response(query, metadata_list):
    # Initialize the OpenAI client
    client = OpenAI()
    
    # Format the context from metadata
    context = "\n\n".join([f"Document: {meta['raw_string']}" for meta in metadata_list])
    
    # Prepare the messages for the chat completion
    messages = [
        {"role": "system", "content": "You are an assistant who strictly uses source text provided to you to answer user queries."},
        {"role": "user", "content": query},
        {"role": "assistant", "content": context}
    ]
    
    # Call the OpenAI API using the updated syntax
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=2000,
        temperature=0.7,
        logprobs=True
    )
    logprobs = [token.logprob for token in response.choices[0].logprobs.content]
    # Calculate the average log probability
    average_log_prob = sum(logprobs) / len(logprobs)
    probability = math.exp(average_log_prob)
    print(probability)


    return response.choices[0].message.content

# Example usage
query = query
response_text = generate_chatgpt_response(query, filtered_metadata)
print(response_text)

0.9705557360210086
0.7199008487012213
To get started on the path to becoming virtually free, the first thing to do is to begin dismantling your sense of social identity. This involves becoming aware of and freeing yourself from the social mores, beliefs, values, traditions, and conditioning that have shaped who you think and feel yourself to be. Here are the steps to take:

1. **Awareness:** Start by becoming aware of all the socialization, conditioning, and programming that have influenced your thoughts and feelings. Recognize that much of what you consider to be your identity is a result of these external influences.

2. **Examine Beliefs:** Critically examine all your beliefs, ideas, values, and customs. Understand that many of these are mechanical repetitions of previously received ideas rather than the result of autonomous reasoning.

3. **Forsake Social Identity:** Voluntarily forsake the social identity that has been imposed on you. This can be done by observing yourself in your

In [8]:
def format_results(results_list):
    formatted_results = "\n\n".join([f"Document: {result['raw_string']}" for result in results_list])
    return formatted_results

def get_completion(prompt):
    """
    Generate a completion using the Claude model from Anthropic.
    
    Args:
        prompt (str): The input prompt for the model.
        
    Returns:
        str: The generated completion.
    """
    response = claude_client.messages.create(
        model="claude-3-sonnet-20240229",
        system="Use only the information from the search results when responding.",
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=4096
    )
    return response

def generate_claude_response(query, metadata_list):
    # Format the context from metadata using the helper function
    formatted_results = format_results(metadata_list)
    
    # Create the prompt for Claude with the appropriate tags
    prompt = f"Using the search results provided within the <search_results>{formatted_results}</search_results> tags, please answer the following question <question>{query}</question>."
    
    # Call the Claude API to get the response
    response_text = get_completion(prompt)
    
    return response_text

# Example usage
response_text = generate_claude_response(query, filtered_metadata)
print(response_text)

Message(id='msg_01E5w5f9yjBANdAp6Jjc95Cq', content=[TextBlock(text='Based on the information provided, here are the key steps outlined for getting started on becoming virtually free:\n\n1. Start by dismantling your sense of social identity that has been overlaid on you since birth. Question and let go of all the social mores, beliefs, ideas, values, truths, customs, traditions, ideals, etc. that have been conditioned into you.\n\n2. Become aware of all the socialization, conditioning, programming, and techniques that were used to produce your current identity and way of thinking/feeling. Realize that your current sense of self is largely an artificial "social identity" careening around in confusion and illusion. \n\n3. Work on becoming free from insidious feelings, emotions, and passions that fuel the mind and give credence to illusions, delusions, fantasies and hallucinations masquerading as truth.\n\n4. Decide to lock onto "pure intent" - the intent to discover exactly what you actua