<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/rag_fusion_main_GPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install openai  --root-user-action=ignore -q
%pip install colab-env --upgrade --quiet --root-user-action=ignore -q

In [9]:
import os
import openai
import random
import colab_env

# Initialize OpenAI API
openai.api_key = os.getenv("OPENAI_API_KEY")  # Alternative: Use environment variable
if openai.api_key is None:
    raise Exception("No OpenAI API key found. Please set it as an environment variable or in main.py")


# Function to generate queries using OpenAI's ChatGPT
def generate_queries_chatgpt(original_query):

    #response = openai.ChatCompletion.create( ## openai==0.28
    response = openai.chat.completions.create(
        #model="gpt-3.5-turbo",
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that generates multiple search queries based on a single input query."},
            {"role": "user", "content": f"Generate multiple search queries related to: {original_query}"},
            {"role": "user", "content": "OUTPUT (4 queries):"}
        ]
    )

    #generated_queries = response.choices[0]["message"]["content"].strip().split("\n") ## openai==0.28
    generated_queries = response.choices[0].message.content.strip().split("\n")
    return generated_queries

# Mock function to simulate vector search, returning random scores
def vector_search(query, all_documents):
    available_docs = list(all_documents.keys())
    random.shuffle(available_docs)
    selected_docs = available_docs[:random.randint(2, 5)]
    scores = {doc: round(random.uniform(0.7, 0.9), 2) for doc in selected_docs}
    return {doc: score for doc, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)}

# Reciprocal Rank Fusion algorithm
def reciprocal_rank_fusion(search_results_dict, k=60):
    fused_scores = {}
    print("Initial individual search result ranks:")
    for query, doc_scores in search_results_dict.items():
        print(f"For query '{query}': {doc_scores}")

    for query, doc_scores in search_results_dict.items():
        for rank, (doc, score) in enumerate(sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)):
            if doc not in fused_scores:
                fused_scores[doc] = 0
            previous_score = fused_scores[doc]
            fused_scores[doc] += 1 / (rank + k)
            print(f"Updating score for {doc} from {previous_score} to {fused_scores[doc]} based on rank {rank} in query '{query}'")

    reranked_results = {doc: score for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)}
    print("Final reranked results:", reranked_results)
    return reranked_results

# Dummy function to simulate generative output
def generate_output(reranked_results, queries):
    return f"Final output based on {queries} and reranked documents: {list(reranked_results.keys())}"


# Predefined set of documents (usually these would be from your search database)
all_documents = {
    "doc1": "Climate change and economic impact.",
    "doc2": "Public health concerns due to climate change.",
    "doc3": "Climate change: A social perspective.",
    "doc4": "Technological solutions to climate change.",
    "doc5": "Policy changes needed to combat climate change.",
    "doc6": "Climate change and its impact on biodiversity.",
    "doc7": "Climate change: The science and models.",
    "doc8": "Global warming: A subset of climate change.",
    "doc9": "How climate change affects daily weather.",
    "doc10": "The history of climate change activism."
}

# Main function
if __name__ == "__main__":
    original_query = "impact of climate change"
    generated_queries = generate_queries_chatgpt(original_query)

    all_results = {}
    for query in generated_queries:
        search_results = vector_search(query, all_documents)
        all_results[query] = search_results

    reranked_results = reciprocal_rank_fusion(all_results)

    final_output = generate_output(reranked_results, generated_queries)

    print(final_output)

Initial individual search result ranks:
For query '1. "Effects of climate change on global weather patterns"': {'doc3': 0.89, 'doc5': 0.81, 'doc1': 0.77, 'doc9': 0.75}
For query '2. "How is climate change affecting wildlife and their habitats?"': {'doc3': 0.84, 'doc2': 0.82, 'doc9': 0.71}
For query '3. "Impact of climate change on human health"': {'doc6': 0.88, 'doc3': 0.78}
For query '4. "Influence of climate change on oceans and seas"': {'doc10': 0.86, 'doc6': 0.85, 'doc5': 0.83, 'doc3': 0.8}
For query '5. "Economic impact of climate change globally"': {'doc3': 0.89, 'doc5': 0.86, 'doc2': 0.83, 'doc6': 0.77}
For query '6. "Climate change impact on agriculture and food supply"': {'doc2': 0.87, 'doc7': 0.83}
For query '7. "Climate change and its effect on polar regions"': {'doc6': 0.81, 'doc4': 0.76, 'doc8': 0.76}
For query '8. "Impact of climate change on urban areas"': {'doc9': 0.9, 'doc4': 0.87, 'doc2': 0.77, 'doc5': 0.73}
For query '9. "How does climate change affect natural disast

### model="gpt-3.5-turbo",

Initial individual search result ranks:

For query '1. "Global warming effects on ecosystems"': {'doc6': 0.83, 'doc10': 0.78, 'doc1': 0.76, 'doc5': 0.73}

For query '2. "Rising sea levels impact on coastal communities"': {'doc3': 0.84, 'doc5': 0.83, 'doc4': 0.73, 'doc6': 0.71}

For query '3. "Adaptation strategies for climate change"': {'doc7': 0.9, 'doc8': 0.81, 'doc2': 0.8, 'doc10': 0.73, 'doc1': 0.72}

For query '4. "Extreme weather events and their connection to climate
change"': {'doc10': 0.78, 'doc2': 0.78, 'doc5': 0.78, 'doc9': 0.75, 'doc8': 0.74}


Updating score for doc6 from 0 to 0.016666666666666666 based on rank 0 in query '1. "Global warming effects on ecosystems"'
Updating score for doc10 from 0 to 0.01639344262295082 based on rank 1 in query '1. "Global warming effects on ecosystems"'
Updating score for doc1 from 0 to 0.016129032258064516 based on rank 2 in query '1. "Global warming effects on ecosystems"'
Updating score for doc5 from 0 to 0.015873015873015872 based on rank 3 in query '1. "Global warming effects on ecosystems"'
Updating score for doc3 from 0 to 0.016666666666666666 based on rank 0 in query '2. "Rising sea levels impact on coastal communities"'
Updating score for doc5 from 0.015873015873015872 to 0.032266458495966696 based on rank 1 in query '2. "Rising sea levels impact on coastal communities"'
Updating score for doc4 from 0 to 0.016129032258064516 based on rank 2 in query '2. "Rising sea levels impact on coastal communities"'
Updating score for doc6 from 0.016666666666666666 to 0.032539682539682535 based on rank 3 in query '2. "Rising sea levels impact on coastal communities"'
Updating score for doc7 from 0 to 0.016666666666666666 based on rank 0 in query '3. "Adaptation strategies for climate change"'
Updating score for doc8 from 0 to 0.01639344262295082 based on rank 1 in query '3. "Adaptation strategies for climate change"'
Updating score for doc2 from 0 to 0.016129032258064516 based on rank 2 in query '3. "Adaptation strategies for climate change"'
Updating score for doc10 from 0.01639344262295082 to 0.032266458495966696 based on rank 3 in query '3. "Adaptation strategies for climate change"'
Updating score for doc1 from 0.016129032258064516 to 0.031754032258064516 based on rank 4 in query '3. "Adaptation strategies for climate change"'
Updating score for doc10 from 0.032266458495966696 to 0.04893312516263336 based on rank 0 in query '4. "Extreme weather events and their connection to climate change"'
Updating score for doc2 from 0.016129032258064516 to 0.03252247488101534 based on rank 1 in query '4. "Extreme weather events and their connection to climate change"'
Updating score for doc5 from 0.032266458495966696 to 0.04839549075403121 based on rank 2 in query '4. "Extreme weather events and their connection to climate change"'
Updating score for doc9 from 0 to 0.015873015873015872 based on rank 3 in query '4. "Extreme weather events and their connection to climate change"'
Updating score for doc8 from 0.01639344262295082 to 0.032018442622950824 based on rank 4 in query '4. "Extreme weather events and their connection to climate change"'
Final reranked results: {'doc10': 0.04893312516263336, 'doc5': 0.04839549075403121, 'doc6': 0.032539682539682535, 'doc2': 0.03252247488101534, 'doc8': 0.032018442622950824, 'doc1': 0.031754032258064516, 'doc3': 0.016666666666666666, 'doc7': 0.016666666666666666, 'doc4': 0.016129032258064516, 'doc9': 0.015873015873015872}
Final output based on ['1. "Global warming effects on ecosystems"', '2. "Rising sea levels impact on coastal communities"', '3. "Adaptation strategies for climate change"', '4. "Extreme weather events and their connection to climate change"'] and reranked documents: ['doc10', 'doc5', 'doc6', 'doc2', 'doc8', 'doc1', 'doc3', 'doc7', 'doc4', 'doc9']