In [1]:
# Load the autoreload extension
%load_ext autoreload

# Reload all modules (except those imported with %aimport) automatically
%autoreload 2


from GP_Copilot.copilot import LLM_langchain
import pandas as pd
import os
from langchain_core.documents import Document
import json
import requests
import numpy as np
import datetime

## langgraph stuff

# google gemini
from langchain_google_genai import ChatGoogleGenerativeAI


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
with open('questions.txt', 'r') as file:
    questions = file.read().split('--------------')
with open('answers.txt', 'r') as file:
    answers = file.read().split('--------------')
    

In [3]:
qas = list(zip(questions, answers))
for q, a in qas:
    print(q)
    print(a)
    break



Subject: RNASeq TPM and inter-sample variability in ssGSEA


Hello,
I would like to run ssGSEA in RNASeq data from the  CCLE repository.  

In this thread https://groups.google.com/g/genepattern-help/c/vcg9gLolZAY , 
it is recommended to use the PreprocessReadCounts module to process raw 
counts and then directly use ssGSEA. I found that the PreprocessReadCounts 
is now renamed as VoomNormalize but this module does not provide TPMs or 
accounts for gene length issues that influence ssGSEA performance. 
Nonetheless, it accounts for intersample variability, which is ignored in 
TPM units. 
In a more recent thread, 
https://groups.google.com/g/genepattern-help/c/pVvpxGOkiZU/m/Vsjrn-p6DQAJ , 
it is recommended to use TPM units as input to ssGSEA. 

My questions:
1) Should I use the VoomNormalize results or the TPM values as input to 
ssGSEA?

2) If I use TPM units, how should I remove the inter-sample variability? 
Which sample normalization method, from the 3 options (rank, rank.log or 

In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from typing import Optional
from pydantic import BaseModel, Field

class validation(BaseModel):
    """
    Validation from the validation bot, structured output
    """

    feedback : Optional[str] = Field(description = "Your feedback to the ai response v.s. the real answer")
    score: Optional[int] = Field(description = "The score you're giving to the ai response, out of 100. ")
    refinement: Optional[str] = Field(description = "Refined AI response.")
    metadata : Optional[dict] = Field(description = 'Any other metadata including i/o tokens and stuff')
    
def validation_bot(user_query, ai_response , real_answer):
    """
    This bot will take some AI response, some user query, and a real response to the question and
    give a score as well as how good an AI response is. 
    """

    model = LLM_langchain.get_model('haiku', True)

    system_prompt = """
        You are an expert in assisting biologists. 
        Your task is to evaluate an AI-generated response to a user's biology-related question. 
        You will receive three pieces of information: 
        (1) the original user question, 
        (2) the AI's response, and 
        (3) the correct human-provided answer.
        Assess the AI’s response for accuracy, completeness, and clarity. 
        The best response is one that correctly answers the question concisely, 
        without adding unnecessary details, misleading information, or excessive mathematical explanations unless explicitly needed.
        Provide a short and concise evaluation of the AI's response, 
        pointing out any errors, missing details, or unnecessary complexity. 
        Then, assign a score from 0 to 100 based on correctness, 
        completeness, and clarity, where 100 represents a perfectly accurate and well-structured response.

        NOTE: some of the answers might be follow up questions because of how they are structued in the thread. Use your best judgement to determine if 
        a message is a question or a response. 

        Here is the original user question: 

        {user_q}

        Here is the AI response: 

        {ai_response}
 y
        Here is the real answer:

        {real_answer}

        Give a better answer than the AI. 

        Also tell me what metric you used to give the validation score. 
        Regardless of the quality of the question and answer, tell me your validation process and how you came up with the score.
        And by how you came up with the numbers, tell me what metric you used to come up with these numbers. 
        Specifically, how do you determine the accuracy score and stuff? 
        
        """

    prompt = ChatPromptTemplate.from_template(system_prompt)

    chain = {'user_q': RunnablePassthrough(), 
             'ai_response' : RunnablePassthrough(), 
             'real_answer':RunnablePassthrough()} | prompt | model.with_structured_output(validation, include_raw = True)

    response = chain.invoke({'user_q' : user_query,
                            'ai_response' : ai_response, 
                            'real_answer' : real_answer})

    return response

    
def inference(model, system_prompt, collection_name, user_prompt, real_answer):
    '''
    actual inference
    '''
    # Call model
    chain = LLM_langchain.get_chain(
        model_type=model, 
        aws=True, 
        retriever_path='Gp_Copilot/chroma', 
        custom_system_prompt=system_prompt, 
        session_id=collection_name, 
        verbose=True
    )
    # Get response
    response = chain.invoke(user_prompt)
    response_dict = response.model_dump()
    ai_response = response_dict['content']
    # validation of answer: 
    validation = validation_bot(user_prompt, ai_response, real_answer)
    feedback = validation['raw'].content
    
    # Generate JSON filename
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"validation_responses/validation_{timestamp}_{np.random.randint(100000)}.json"
    
    # Create JSON structure
    result_data = {
        "DateTime": datetime.datetime.now().isoformat(),
        "ModelType": model,
        "SystemPrompt": system_prompt,
        "DocumentStoreVersion": collection_name,
        "UserPrompt": user_prompt,
        "Response": ai_response,
        "real_answer" : real_answer,
        'Validation_feedback' : validation['parsed'].feedback, 
        'Validation_score' : validation['parsed'].score, 
        'Validation_refinement' : validation['parsed'].refinement, 
        'Usage_InputTokens': response_dict['usage_metadata']['input_tokens'] + validation['raw'].usage_metadata['input_tokens'],
        'Usage_OutputTokens': response_dict['usage_metadata']['output_tokens']+ validation['raw'].usage_metadata['output_tokens'],
        'Usage_TotalTokens': response_dict['usage_metadata']['total_tokens'] + validation['raw'].usage_metadata['total_tokens'],
    }
    
    # Save JSON file
    with open(filename, "w", encoding="utf-8") as json_file:
        json.dump(result_data, json_file, indent=4)
    
    # Update progress bar
    return result_data





In [7]:
# from tqdm import tqdm
# import datetime
# import json
# import sys
# import time

    
# def validation_func(model_types, system_prompts, doc_stores_collection_names, questions):
#     '''
#     function to do some validation 

#     model_types -- list of model types list[str]
#     system_prompts -- list of custom system prompts list[str]
#     doc_stores_collection_names -- list of collection names list[str]
#     user_prompts -- list of user questions list[str]

#     NOTE: if system prompt is "", then it will be the "default"
#     '''

#     results = []
    
#     total_iterations = len(model_types) * len(system_prompts) * len(doc_stores_collection_names) * len(user_prompts)

#     # Token tracking
#     tokens_used = 0
#     start_time = time.time()
#     with tqdm(total=total_iterations, desc="Validating", unit="iter", leave=True, file=sys.stdout) as pbar:
#         for system_prompt in system_prompts:
#             for q, a in questions:
#                 for collection_name in doc_stores_collection_names:
#                     for model in model_types:
#                         tqdm.write(f'Performing experiment using: {model}, with store: {collection_name}')
#                         try:
#                             result = inference(model, system_prompt, collection_name, q, a)
#                         except Exception as e:
#                             ## probably reached rate limit, wait 1 min
#                             print(e)
#                             tqdm.write('sleeping...')
#                             time.sleep(61)
#                             result = inference(model, system_prompt, collection_name, q, a)
#                         pbar.update(1)
#                         results.append(result)
#     return results

# ## Example usage
# model_types = ['gpt']
# system_prompts = [
#     "You are a bioinformatics specialist with the GenePattern team, tasked with assisting users in navigating bioinformatics workflows. When an image is included, provide a detailed description. If a tool is not listed in the vector store, respond with: \"That tool is not currently available in GenePattern. Feel free to contact the GenePattern team if you think it would be a good addition to our repository. Email: edh021@cloud.ucsd.edu.\" Always specify the required input file formats when explaining how to use modules or tools. Recommend only those modules that are part of GenePattern. Avoid instructing users to “go to GenePattern and log in.” Use your expertise to answer questions thoroughly, offering step-by-step guidance. When discussing workflows, suggest only modules available on the GenePattern server. Assume the inquirer is an undergraduate biology student with minimal programming and computer skills.",
    
#     "As a bioinformatics expert with the GenePattern team, your role is to provide guidance on bioinformatics workflows. If an image is included, describe it comprehensively. For tools not found in the vector store, reply with: \"That tool is not currently available in GenePattern. Feel free to contact the GenePattern team if you think it would be a good addition to our repository. Email: edh021@cloud.ucsd.edu.\" Clearly state the input file formats needed for running modules or tools. Suggest only GenePattern modules. Refrain from telling users to “go to GenePattern and log in.” Use your knowledge to answer questions in detail, with clear, step-by-step instructions. When addressing workflows, recommend only modules present on the GenePattern server. Assume the questioner is an undergraduate biologist with limited programming and computer experience.",
    
#     "You are a bioinformatics authority working with the GenePattern team, responsible for answering questions about bioinformatics workflows. If an image is provided, offer a detailed description. For tools not in the vector store, respond with: \"That tool is not currently available in GenePattern. Feel free to contact the GenePattern team if you think it would be a good addition to our repository. Email: edh021@cloud.ucsd.edu.\" Specify the input file formats when explaining how to use modules or tools. Recommend only modules available in GenePattern. Avoid instructing users to “go to GenePattern and log in.” Provide comprehensive answers with step-by-step guidance. When discussing workflows, suggest only modules available on the GenePattern server. Assume the person asking is an undergraduate biology student with little programming and computer knowledge.",
    
#     "As a bioinformatics consultant for the GenePattern team, your task is to assist with questions about bioinformatics workflows. If an image is included, describe it in detail. For tools not in the vector store, reply with: \"That tool is not currently available in GenePattern. Feel free to contact the GenePattern team if you think it would be a good addition to our repository. Email: edh021@cloud.ucsd.edu.\" Clearly indicate the input file formats required for using modules or tools. Recommend only GenePattern modules. Do not instruct users to “go to GenePattern and log in.” Use your expertise to provide detailed answers with step-by-step instructions. When addressing workflows, suggest only modules available on the GenePattern server. Assume the inquirer is an undergraduate biologist with minimal programming and computer skills.",
    
#     "You are a bioinformatics advisor with the GenePattern team, dedicated to answering questions about bioinformatics workflows. If an image is provided, describe it thoroughly. For tools not found in the vector store, respond with: \"That tool is not currently available in GenePattern. Feel free to contact the GenePattern team if you think it would be a good addition to our repository. Email: edh021@cloud.ucsd.edu.\" Specify the input file formats when explaining how to use modules or tools. Recommend only modules that are part of GenePattern. Avoid telling users to “go to GenePattern and log in.” Provide detailed answers with step-by-step guidance. When discussing workflows, suggest only modules available on the GenePattern server. Assume the questioner is an undergraduate biology student with limited programming and computer experience."
# ]
# doc_stores_collection_names = ['empty', 'docs_readmes_wrappers_manifests', 'docs_readmes_wrappers_manifests_guides', 'docs_readmes_wrappers_manifests_guides_helpforum']
# user_prompts = qas[10:50]

# results = validation_func(model_types, system_prompts, doc_stores_collection_names, user_prompts)



In [9]:
LLM_langchain.get_model('gpt', False)

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x311d418d0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x32d0cbc10>, root_client=<openai.OpenAI object at 0x32c855510>, root_async_client=<openai.AsyncOpenAI object at 0x311d41d10>, model_name='gpt-4o', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [10]:
model_types = ['gpt']
system_prompts = [
    "You are an expert bioinformatics assistant specializing in GenePattern workflows. Your role is to guide users in using bioinformatics tools with clear, structured, and step-by-step explanations. \n\n- **When an image is included**, provide a detailed description of its content and relevance to bioinformatics. \n- **If a requested tool is not found in the vector store**, respond with: \"That tool is not currently available in GenePattern. You may suggest adding it by contacting the GenePattern team at edh021@cloud.ucsd.edu.\"\n- **Always specify the required input file formats** when explaining how to use tools and modules.\n- **Only recommend GenePattern modules**, avoiding external tools unless explicitly requested.\n- **Never instruct users to manually log in**; instead, guide them through the workflow directly.\n\nAssume the user is an undergraduate biology student with minimal programming knowledge. Use a structured format (e.g., numbered steps, bullet points) to improve readability and usability.",

    "As an advanced bioinformatics assistant, your task is to help users navigate GenePattern workflows with precise, well-structured explanations. \n\n- **For images**, describe their contents in a bioinformatics context.\n- **For missing tools**, reply: \"That tool is not currently available in GenePattern. If you believe it should be added, please email edh021@cloud.ucsd.edu.\"\n- **Always specify input file formats** and expected data structures before running any module.\n- **Only suggest tools that are part of GenePattern** to ensure compatibility.\n- **Instead of directing users to log in manually**, walk them through the process of using the system step by step.\n\nYour responses should be clear, structured, and use examples where possible. Assume the user is an undergraduate biology student with limited programming knowledge.",

    "You are a bioinformatics specialist with deep expertise in GenePattern workflows. Your goal is to assist users by providing precise, structured, and actionable guidance.\n\n- **For images**, analyze and describe their content with bioinformatics relevance.\n- **If a requested tool is unavailable**, respond with: \"That tool is not currently available in GenePattern. If you think it should be added, contact the GenePattern team at edh021@cloud.ucsd.edu.\"\n- **Clearly state required input file formats** for all modules before explaining how to use them.\n- **Ensure all recommended tools are part of GenePattern** to avoid confusion.\n- **Provide detailed, step-by-step instructions**, rather than vague guidance.\n\nWrite responses as if the user is an undergraduate biologist unfamiliar with command-line programming. Use numbered lists, bullet points, and concise explanations to enhance clarity and usability.",

    "You are a bioinformatics consultant specializing in GenePattern. Your primary role is to provide users with actionable guidance in navigating workflows and bioinformatics tools.\n\n- **If an image is included**, analyze its contents and provide a relevant description.\n- **If a tool is not in the vector store**, reply: \"That tool is not currently available in GenePattern. If you think it should be included, contact edh021@cloud.ucsd.edu.\"\n- **Ensure input file formats are clearly specified** for all tools before giving usage instructions.\n- **Only recommend tools that are officially part of GenePattern**.\n- **Break down explanations into step-by-step instructions** and avoid ambiguous statements.\n\nAssume the user is an undergraduate biology student with minimal programming experience. Structure responses clearly using headings, numbered steps, and examples to enhance readability.",

    "You are an AI-driven bioinformatics advisor for GenePattern, designed to assist users with structured and well-explained guidance on bioinformatics workflows.\n\n- **If an image is included**, describe it thoroughly, emphasizing bioinformatics context.\n- **For missing tools**, respond with: \"That tool is not currently available in GenePattern. You can request its addition by contacting edh021@cloud.ucsd.edu.\"\n- **Always specify required input file formats** and data requirements before explaining module usage.\n- **Recommend only GenePattern modules**, ensuring all suggestions are directly applicable.\n- **Provide responses in a structured format**, using numbered steps, bullet points, and examples.\n\nAssume users have minimal programming knowledge and need clear, easy-to-follow instructions. Avoid assumptions that require prior knowledge of GenePattern or coding."
]
doc_stores_collection_names = ['empty', 'docs_readmes_wrappers_manifests', 'docs_readmes_wrappers_manifests_guides', 'docs_readmes_wrappers_manifests_guides_helpforum']
user_prompts = qas[:50]




In [13]:
LLM_langchain.get_retriever()

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

{'DateTime': '2025-02-19T15:54:46.394920',
 'ModelType': 'gpt',
 'SystemPrompt': 'prompt',
 'DocumentStoreVersion': 'docs_readmes_wrappers_manifests_guides_helpforum',
 'UserPrompt': 'question',
 'Response': 'It seems like the prompt is incomplete and lacks specific context or a clear question. Could you please provide more details or clarify the question you would like answered?',
 'real_answer': 'answer',
 'Validation_feedback': "The provided input appears to be a template or placeholder with identical content for user question, AI response, and real answer. Without a specific biology-related question, it's impossible to provide a meaningful evaluation.",
 'Validation_score': 0,
 'Validation_refinement': "To properly evaluate an AI response, I would need:\n1. A specific biology-related question\n2. The AI's actual response to that question\n3. The correct human-provided answer\n\nMy validation process typically involves the following metrics:\n\nAccuracy Score Breakdown:\n- Factual C

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [11]:
import time
import concurrent.futures
from tqdm import tqdm


# Function to run inference and handle rate limit errors
def run_inference(model, system_prompt, collection_name, q, a):
    try:
        result = inference(model, system_prompt, collection_name, q, a)
    except Exception as e:
        print(f"Error encountered: {e}")
        tqdm.write("Rate limit reached, sleeping for 61 seconds...")
        time.sleep(61)  # Sleep and retry
        result = inference(model, system_prompt, collection_name, q, a)

    return result

# Function to handle parallel execution
def run_parallel():
    tasks = []
    
    # Calculate total iterations for progress bar
    total_iterations = len(system_prompts) * len(user_prompts) * len(doc_stores_collection_names) * len(model_types)
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor, tqdm(total=total_iterations, desc="Running Inference") as pbar:
        future_to_params = {}

        # Submit tasks
        for system_prompt in system_prompts:
            for q, a in user_prompts:
                for collection_name in doc_stores_collection_names:
                    for model in model_types:
                        future = executor.submit(run_inference, model, system_prompt, collection_name, q, a)
                        future_to_params[future] = (model, system_prompt, collection_name, q, a)
        
        # Collect results
        results = []
        for future in concurrent.futures.as_completed(future_to_params):
            try:
                results.append(future.result())  # Get the result
            except Exception as e:
                tqdm.write(f"Unexpected error: {e}")
            pbar.update(1)  # Update progress bar

    return results

# Run the parallel execution
results = run_parallel()

Running Inference:   0%|                                                                                                                                                  | 0/1000 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0


Running Inference:   0%|▏                                                                                                                                       | 1/1000 [00:22<6:07:07, 22.05s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   0%|▎                                                                                                                                       | 2/1000 [00:26<3:15:50, 11.77s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   0%|▍                                                                                                                                       | 3/1000 [00:29<2:08:13,  7.72s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   0%|▋                                                                                                                                         | 5/1000 [00:30<53:48,  3.25s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|▊                                                                                                                                         | 6/1000 [00:32<48:03,  2.90s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|▉                                                                                                                                         | 7/1000 [00:33<38:17,  2.31s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|█                                                                                                                                         | 8/1000 [00:36<38:28,  2.33s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|█▏                                                                                                                                        | 9/1000 [00:37<31:40,  1.92s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|█▎                                                                                                                                       | 10/1000 [00:39<31:55,  1.93s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|█▍                                                                                                                                     | 11/1000 [00:47<1:03:43,  3.87s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|█▋                                                                                                                                       | 12/1000 [00:48<48:02,  2.92s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   1%|█▉                                                                                                                                       | 14/1000 [00:51<34:02,  2.07s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|██                                                                                                                                       | 15/1000 [00:51<26:32,  1.62s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|██▏                                                                                                                                      | 16/1000 [00:54<32:23,  1.98s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|██▎                                                                                                                                      | 17/1000 [01:01<55:11,  3.37s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|██▍                                                                                                                                      | 18/1000 [01:02<47:30,  2.90s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|██▌                                                                                                                                      | 19/1000 [01:05<44:54,  2.75s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|██▋                                                                                                                                      | 20/1000 [01:08<45:27,  2.78s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|██▉                                                                                                                                      | 21/1000 [01:10<45:01,  2.76s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|███                                                                                                                                      | 22/1000 [01:13<45:33,  2.80s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Running Inference:   2%|███▏                                                                                                                                     | 23/1000 [01:14<34:24,  2.11s/it]

Retriever found. Number of documents in collection: 0


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Running Inference:   2%|███▎                                                                                                                                     | 24/1000 [01:14<25:31,  1.57s/it]

Retriever found. Number of documents in collection: 0


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   2%|███▍                                                                                                                                     | 25/1000 [01:16<25:49,  1.59s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|███▌                                                                                                                                     | 26/1000 [01:21<41:48,  2.58s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|███▋                                                                                                                                     | 27/1000 [01:22<34:24,  2.12s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|███▊                                                                                                                                     | 28/1000 [01:26<47:24,  2.93s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|███▉                                                                                                                                     | 29/1000 [01:29<44:32,  2.75s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|████                                                                                                                                     | 30/1000 [01:31<42:37,  2.64s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|████▏                                                                                                                                    | 31/1000 [01:36<55:10,  3.42s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Running Inference:   3%|████▍                                                                                                                                    | 32/1000 [01:37<40:36,  2.52s/it]

Retriever found. Number of documents in collection: 0


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|████▌                                                                                                                                    | 33/1000 [01:38<32:45,  2.03s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|████▋                                                                                                                                    | 34/1000 [01:40<32:09,  2.00s/it]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Retriever found. Number of documents in collection: 0


Running Inference:   3%|████▋                                                                                                                                    | 34/1000 [01:41<47:51,  2.97s/it]


KeyboardInterrupt: 

Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Retriever found. Number of documents in collection: 0
Error encountered: Socket operation on non-socketError encountered: Socket operation on non-socket
Rate limit reached, sleeping for 61 seconds...
Retriever found. Number of documents in collection: 0
Error encountered: Socket operation on non-socketRetriever found. Number of documents in collection: 0
Error encountered: Socket operation on non-socket
Rate limit reached, sleeping for 61 seconds...
Error encountered: Socket operation on non-socketError encountered: Socket operation on non-socket
Rate limit reached, sleeping for 61 seconds...
Error encountered: Socket operation on non-socket
Rate limit reached, sleepi

In [14]:
import concurrent.futures

executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)

# Shutdown immediately, cancelling all pending tasks
executor.shutdown(wait=False)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [104]:
import os
import json
import pandas as pd

# Directory containing JSON files
json_dir = "validation_responses"  # Update with your actual directory path

# List to store data
data = []

# Iterate over all JSON files in the directory
for file in os.listdir(json_dir):
    if file.endswith(".json"):  # Process only JSON files
        file_path = os.path.join(json_dir, file)
        with open(file_path, "r", encoding="utf-8") as f:
            json_data = json.load(f)
            data.append(json_data)  # Append the JSON content as a dictionary

# Convert list of dictionaries into a Pandas DataFrame
df = pd.DataFrame(data)


In [1]:
df

NameError: name 'df' is not defined

In [105]:
df.to_csv('validation_results3.csv')

In [106]:
df.Usage_InputTokens.sum() * 0.0025/1000

126.8110925

In [107]:
df.Usage_OutputTokens.sum() * 0.0025/1000

6.1727725

In [38]:
df.columns

Index(['DateTime', 'ModelType', 'SystemPrompt', 'DocumentStoreVersion',
       'UserPrompt', 'Response', 'real_answer', 'Validation_feedback',
       'Validation_score', 'Validation_refinement', 'Usage_InputTokens',
       'Usage_OutputTokens', 'Usage_TotalTokens'],
      dtype='object')

In [None]:
custom_prompt = """
You are a professional in prompt engineering. 
You will be given a prompt and you will generation some variation of it for AIs to better understand. 
Feel free to add any language to it to the best of your knowledge, or if you think it's relevant to 
the overall theme or topic of the prompt. 
(datetime.datetime.now() - now)
datetime.timedelta(seconds=58, microseconds=48171)


"""

In [17]:
chain = LLM_langchain.get_model(model_type='haiku', aws = True)
chain.invoke('Who r u ')

AIMessage(content="I'm Claude, an AI created by Anthropic. I aim to be helpful, honest, and harmless. How can I assist you today?", additional_kwargs={}, response_metadata={'ResponseMetadata': {'RequestId': 'faa5e384-6545-4296-9cec-c6421e885bfb', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Fri, 07 Feb 2025 06:30:20 GMT', 'content-type': 'application/json', 'content-length': '292', 'connection': 'keep-alive', 'x-amzn-requestid': 'faa5e384-6545-4296-9cec-c6421e885bfb'}, 'RetryAttempts': 0}, 'stopReason': 'end_turn', 'metrics': {'latencyMs': [1258]}}, id='run-ed5a5af0-ea48-4cd4-bb0a-7bf7e2f8431c-0', usage_metadata={'input_tokens': 11, 'output_tokens': 35, 'total_tokens': 46})

In [None]:
response = chain.invoke('''

Heres the prompt:


You are a bioinformatics expert who works for the GenePattern team.
Your job is to answer bioinformatics related questions about running a workflow. 
If an image description is provided, describe the image. 

Do not describe tools that are not in the vector store, instead respond
with "That tool is not currently available in GenePattern. Feel free to contact
the GenePattern team if you think it would be a good addition to our repository. Email: edh021@cloud.ucsd.edu"
Provide input file formats when giving instructions on how to run modules
or tools. Only give module suggestions for modules in GenePattern.
Do not tell users to “go to GenePattern and log in”.
Answer the following questions using all your knowledge
and providing as much detail as possible with step-by-step instructions.

If answering questions about workflows, only provide modules that exist on the GenePattern server. 
Assume the person asking questions is an undergraduate biologist with little to no experience with programming and computers. 

            
generate 5 variations of this.


''')

In [None]:
print(response.content)

In [None]:


user_query = 'Im wondering what kind of normalization DESeq2 does to generate the baseMean column in the output file. Is it TPM?'
ai_response = '''DESeq2 does not use Transcripts Per Million (TPM) for normalization. Instead, DESeq2 employs a method called "size factor" normalization to generate the baseMean column in the output file. This method is designed to account for differences in sequencing depth across samples.

Here's a brief overview of how DESeq2 performs normalization:

Size Factor Calculation: DESeq2 calculates a size factor for each sample. This size factor is used to adjust for differences in sequencing depth. The size factor is computed by taking the median of the ratios of observed counts to a pseudo-reference sample, which is constructed as the geometric mean of counts across all samples for each gene.
Normalization: The raw counts for each gene in a sample are divided by the sample's size factor. This results in normalized counts that are used for downstream analysis, such as differential expression testing.
baseMean Calculation: The baseMean is the average of the normalized counts for a gene across all samples. It provides a measure of the average expression level of a gene, accounting for differences in sequencing depth.
This normalization approach is different from TPM, which normalizes for both sequencing depth and gene length, and is typically used for comparing expression levels of genes within a sample rather than across samples.'''
real = "Per DESeq2's manual the baseMean represents the average of the normalized count values, divided by size factors, taken over all samples. The DESeq2's count normalization is based on a 'median of ratios' method and not TPM."
response = validation_bot(user_query, ai_response, real)

    

In [None]:
response['raw'].usage_metadata

In [None]:
os.environ['GOOGLE_GEMINI_API_KEY']

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    api_key = os.environ['GOOGLE_GEMINI_API_KEY']
)

In [None]:
llm.invoke('what are you')

In [None]:
system_prompts = ["Prompt 1", "Prompt 2"]  # Example system prompts
questions = [("What is DNA?", "DNA is a molecule."), ("What is RNA?", "RNA is similar to DNA.")]  # Example Q&A pairs
doc_stores_collection_names = ["collection1", "collection2"]  # Example collection names
model_types = ["gpt-3", "llama"]  # Example models

# Iterate over all permutations and print
for system_prompt in system_prompts:
    for q, a in questions:
        for collection_name in doc_stores_collection_names:
            for model in model_types:
                print(f"System Prompt: {system_prompt}")
                print(f"Question: {q}")
                print(f"Answer: {a}")
                print(f"Collection Name: {collection_name}")
                print(f"Model: {model}")
                print("-" * 50)  # Separator for readability