#### This Notebook use matching engine along with Gemini Models

#### Author: Saurabh Mangal (saurabhmangal@google.com)
##### Date: 21st Feb
##### Description: This notebook contains part 4 of lab

 Copyright (c) [2024] [saurabhmangal@] -- 
 This notebook is licensed under the Commercial License.

### Installation of required libs for Gemini and PaLM


In [1]:
# !pip install streamlit

In [2]:
!pip install --upgrade google-cloud-aiplatform



In [3]:
!pip install --upgrade google-cloud-aiplatform
GCP_PROJECT= PROJECT_ID=project_id='my-project-0004-346516'
LOCATION = REGION = 'us-central1'



### Vertex AI setup

In [None]:
### Defining PaLM Functions

In [4]:
import os

# import streamlit as st
import vertexai
from vertexai.preview.language_models import TextGenerationModel

vertexai.init(project=PROJECT_ID, location=LOCATION)


# @st.cache_resource
def get_model():
    generation_model = TextGenerationModel.from_pretrained("text-bison@002")
    return generation_model


def get_text_generation(prompt="", **parameters):
    generation_model = get_model()
    response = generation_model.predict(prompt=prompt, **parameters)

    return response.text

### Defining Gemini Functions

In [5]:
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part

# input_prompt = """can you give me details of paracetamol"""

def generate(input_prompt):
    model = GenerativeModel("gemini-ultra")
    responses = model.generate_content(
        input_prompt ,
    generation_config={
        "max_output_tokens": 2048,
        "temperature": 0.2,
        "top_p": 1,
        "top_k": 32
    },
        safety_settings=[],
        stream=True,
    )
    
    all_response  = []
    
    for response in responses:
        # print(response.text, end="")
        all_response.append(response.text)
    
    # print (all_response)
    
    return(" ".join(all_response))
    

def generate_pro(input_prompt):
    model = GenerativeModel("gemini-pro")
    responses = model.generate_content(
    input_prompt,
    generation_config={
        "max_output_tokens": 2048,
        "temperature": 0.2,
        "top_p": 1
    },stream=True,)
    
    all_response  = []
    
    for response in responses:
        all_response.append(response.text)
    
    # print (all_response)
    
    return(" ".join(all_response))

from langchain_google_vertexai import VertexAI as langchainVertexAI

def generate_langchain_pro(input_prompt):
    model = langchainVertexAI(model_name="gemini-pro")

    all_response = model.invoke(input_prompt)

    # print (all_response)
    
    return(all_response)

In [6]:
import vertexai
from vertexai.language_models import TextGenerationModel

vertexai.init(project=PROJECT_ID, location=LOCATION)
parameters = {
    "candidate_count": 1,
    "max_output_tokens": 1024,
    "temperature": 1,
    "top_k": 40
}

def generate_palm_unicorn_v1(input_prompt):
    
    model = TextGenerationModel.from_pretrained("text-unicorn@001")

    response = model.predict(
        input_prompt,
        **parameters
    )
    print(f"Response from Model: {response.text}")
    
    return(response.text)

def generate_palm_bison32k(input_prompt):
    
    model = TextGenerationModel.from_pretrained("text-bison-32k")

    response = model.predict(
        input_prompt,
        **parameters
    )
    print(f"Response from Model: {response.text}")
    
    return(response.text)


### Read the Q&A file

#### This uses the file from Matching Engine which has questions and retrieved document results

In [16]:
import pandas as pd
filename = "./harry_potte_qa_output.csv"
df_qa = pd.read_csv(filename, sep ="|")

# print(df_qa.head(1))
System_Prompts = """ You are an expert in reading harry potter books, but only provide evidences from the information provide and do not use an other information
so here are some search results : 
"""

Question_Prompts = """ -- Based on information above help to answer following user question
"""

df_qa['combine_prompt_RAG1'] = System_Prompts + ' ' +df_qa['pagewise_texts_v1'] + ' Please answers the Question : '+ df_qa['Question'] 
df_qa['combine_prompt_RAG2'] = System_Prompts + ' ' +df_qa['pagewise_texts_v2'] + ' Please answers the Questio : '+ df_qa['Question'] 
df_qa['combine_prompt_RAG3'] = System_Prompts + ' ' +df_qa['pagewise_texts_v3'] + ' Please answers the Question : '+ df_qa['Question'] 


# print(df['System Prompts'], df['RAG Results'] ,df['User Question'] )
# print(selected_column[0])

In [17]:
import re

In [18]:
for i in range(0, len(df_qa)):


    clean_text1 = re.sub(r'[^\w\s;]', '', df_qa.loc[i,'combine_prompt_RAG1'])
    clean_text2 = re.sub(r'[^\w\s;]', '', df_qa.loc[i,'combine_prompt_RAG2'])
    clean_text3 = re.sub(r'[^\w\s;]', '', df_qa.loc[i,'combine_prompt_RAG3'])

    if i<=1000:
        # df['Gemini_ultra_model_output'][i] = generate(df['combine_prompt'][i])
        print("iteration #", i, "test")
        if i==32 : 
            print("iteration #", i, "test", clean_text1, clean_text2, clean_text3)
    
    try:
        df_qa.loc[i, "Gemini_pro_model_output_v1"] = generate_pro(clean_text1)
        df_qa.loc[i, "Gemini_pro_model_output_v2"] = generate_pro(clean_text2)
        df_qa.loc[i, "Gemini_pro_model_output_v3"] = generate_pro(clean_text3)
        df_qa.loc[i, "Gemini_pro_model_output_v4"] = generate_langchain_pro(clean_text1)
        
        
    except :
        print("Prompt error at gemini ", i)
        df_qa.loc[i, "Gemini_pro_model_output_v1"] = "Prompt failed "
        df_qa.loc[i, "Gemini_pro_model_output_v2"] = "Prompt failed "
        df_qa.loc[i, "Gemini_pro_model_output_v3"] = "Prompt failed "
        df_qa.loc[i, "Gemini_pro_model_output_v4"] = "Prompt failed "
        

    try:
        df_qa.loc[i, "palm_bison32k_output_v1"] = generate_palm_bison32k(df_qa.loc[i,'combine_prompt_RAG1'])
        df_qa.loc[i, "palm_bison32k_output_v2"] = generate_palm_bison32k(df_qa.loc[i,'combine_prompt_RAG2'])
        df_qa.loc[i, "palm_bison32k_output_v3"] = generate_palm_bison32k(df_qa.loc[i,'combine_prompt_RAG3'])
    except ZeroDivisionError:
        print("Prompt error at palm ", i)
        df_qa.loc[i, "palm_bison32k_output_v1"] = "Prompt failed "
        df_qa.loc[i, "palm_bison32k_output_v2"] = "Prompt failed "
        df_qa.loc[i, "palm_bison32k_output_v3"] = "Prompt failed "
    

# generate_medllms_v1(input_prompt)
# generate_palm_unicorn_v1(input_prompt)
# input_prompt = "What are the symptoms of influenza?" 
# generate_medlpalm(input_prompt)    
    
# print( "/n output here ::" , df['Gemini_ultra_model_output'][i])
# df = df.assign(Gemini_ultra_model_output=generate(df.combine_prompt))
# df['combine_prompt'].head(3)

# df['Gemini_ultra_model_output'].head(3)


iteration # 0 test
Response from Model:  The provided text does not contain information about a magical creature that can only be seen by those who have witnessed death.
Response from Model:  The provided text does not mention any magical creature that can only be seen by those who have witnessed death.
Response from Model:  The provided text does not mention any magical creature that can only be seen by those who have witnessed death.
iteration # 1 test
Response from Model:  The provided text does not mention the name of the school newspaper at Hogwarts.
Response from Model:  The provided text does not mention the name of the school newspaper at Hogwarts.
Response from Model:  The provided text does not specify the name of the school newspaper at Hogwarts.
iteration # 2 test
Response from Model:  The text does not contain the name of any magical map that shows the entire layout of Hogwarts.
Response from Model:  The context provided does not mention anything about the name of the magi

In [19]:
df_qa.columns

Index(['Question', 'Answer', 'i', 'j', 'k', 'pagewise_texts_v1',
       'pagewise_texts_v2', 'pagewise_texts_v3', 'splitted_texts_v1',
       'splitted_texts_v2', 'splitted_texts_v3', 'splitted_texts_chunks_v1',
       'splitted_texts_chunks_v2', 'splitted_texts_chunks_v3', 'page_id_v1',
       'page_id_v2', 'page_id_v3', 'combine_prompt_RAG1',
       'combine_prompt_RAG2', 'combine_prompt_RAG3',
       'Gemini_pro_model_output_v1', 'Gemini_pro_model_output_v2',
       'Gemini_pro_model_output_v3', 'palm_bison32k_output_v1',
       'palm_bison32k_output_v2', 'palm_bison32k_output_v3'],
      dtype='object')

In [22]:

# Delete the 'col2' column
df_qa = df_qa.drop('combine_prompt_RAG1', axis=1)
df_qa = df_qa.drop('combine_prompt_RAG2', axis=1)
df_qa = df_qa.drop('combine_prompt_RAG3', axis=1)

output1 = "./results/harry_potte_qa_model_out.csv"

df_qa.to_csv(output1)


In [None]:
df_qa.head(1)

#### As we can see in the output above the poor search setup give a bad response 