### Required Installations

In [1]:
# %pip install python-dotenv
# %pip install pandas
# %pip install --upgrade langchain
# %pip install langchain-community langchain-core
# %pip install -q -U google-generativeai
# %pip install -U langchain-openai
# %pip install -q -U google-generativeai
# %pip install -qU langchain-google-genai
# %pip install -U langchain-ollama
# # !ollama pull llama3.1
# %pip install -qU langchain_mistralai



#### Imports

In [2]:


# from langchain.chains.llm import LLMChain
from langchain_core.prompts import ChatPromptTemplate



from dotenv import load_dotenv, find_dotenv
import os
from  openai  import OpenAI
import pandas as pd
pd.set_option('display.max_colwidth', None)




#### Load Environment Variables

In [3]:
_ = load_dotenv(find_dotenv())
# print(os.environ["GOOGLE_API_KEY"])
# print(os.environ["LANGCHAIN_API_KEY"])
# print(os.environ["OPENAI_API_KEY"])


### Input csv file

In [4]:
csv_file_name = "train_mini.csv"
# file_name = "train_full.csv"

df = pd.read_csv(csv_file_name)
df.shape
conversation = df.iloc[1]['conversation']
print(conversation)

USER:Hi i need a help, i am very hungry, I am looking for a restaurant
SYSTEM:Sure, I will help you, What type of food are you looking for? Which city should i search in?
USER:Some Punjabi kind of foods in milpitas



#### Common Variable declarations and prompt template

In [5]:
prompt_template = """
You are a user asking a system to {intent}. Your role is to create a one line user instruction. You have to read the conversation based on this intention as the person from the conversation.

Focus on the requirement, whether user is asking you to {intent} the name of the place,time and party size. 
Also identify required number of people, time , place or any other available entities and features to {intent} in the conversation.
Do not display these entities separately though.
Use this entities in various order to construct the user instruction. 



Do not make up information if it is not availble.
Keep the same verb from conversation in the instruction that is used to {intent}.
Use diverse mood and personality in the instruction. 
Do not start with Hey.

"""

intent_dict = {
    'ReserveRestaurant' : 'Reserve a Restaurant',
    'FindRestaurants':'Find a Restaurant',
    'SearchHotel': 'Search a Hotel',
    'ReserveHotel':'Reserve a Hotel'
}


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",f"{prompt_template}",
        ),
        ("human", "{conversation}"),
    ]
)

### Define LLM models


In [6]:
# Define LLM
from langchain_google_genai import ChatGoogleGenerativeAI

gemini_llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.5,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)


from langchain_openai import ChatOpenAI
openai_llm = ChatOpenAI(
    model_name= "gpt-4o-mini", 
    temperature=0.5
    )


from langchain_ollama import ChatOllama

ollama_llm = ChatOllama(
    model = "llama3.1",
    temperature = 0.3,
    num_predict = 256,
    # other params ...
)


from langchain_mistralai import ChatMistralAI

mistral_llm = ChatMistralAI(
    model="open-mistral-7b",
    temperature=0.5,
    # max_retries=2,
    # other params...
)


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
import time

openai_chain = prompt | openai_llm
gemini_chain = prompt | gemini_llm
ollama_chain = prompt | ollama_llm
mistral_chain = prompt | mistral_llm
model_chains = {
    # 'gpt': openai_chain,
    'gemini':gemini_chain,
    # 'mistral':mistral_chain,
    # 'ollama':ollama_chain
    }


def execute_chain(chain,dialogue_id,intent,conversation):            

    # Apply LLM chain on the desired column in each row
    result = chain.invoke(
        {
        "conversation":dialogue_id +': '+ conversation,
        "intent":intent
        }
    )
    return result.content

execute_following = False
experiment = True
if execute_following:
    for model in model_chains.values():
        chain = model_chains[model]
        results = []
        # # Experimentation : Iterate over each row
        if experiment:
            for i in  range(1,205,20):
                    dialogue_id = df.iloc[i]['dialogue_id']
                    conversation = df.iloc[i]['conversation']
                    intent = intent_dict[df.iloc[i]['service_call_method']]
                    if  intent != 'Reserve a Restaurant':continue
                    result = execute_chain(chain,dialogue_id,intent,conversation)
                    results.append(result)
                    time.sleep(2)

        else:   

            # # Iterate over each row
            for index, row in df.iterrows():
                # Apply LLM chain on the desired column in each row
                dialogue_id = row['dialogue_id']
                intent = intent_dict[row['service_call_method']]
                conversation = row['conversation']
                result = execute_chain(chain,dialogue_id,intent,conversation)
                results.append(result)
                break
                
        print(f"{model}: {results}")

In [10]:

# Update Dataframe /CSV
header_list = ['dialogue_id', 'conversation', 'turn_services', 'intent','service_call_method', 'service_call_parameters', 'service_results']
row_count = df.shape[0]

for model in model_chains:
    chain = model_chains[model]
    llm_model_name = type(chain.steps[1]).__name__.replace('Chat','')
    if llm_model_name not in header_list:
        header_list.append(llm_model_name)
        df = df.reindex(columns = header_list)    
    if llm_model_name == 'MistralAI': time.sleep(10)
    column_name = str(llm_model_name) + '_instruction'
    
    results =[]
    for i in  range(0,row_count):
        # if i > 100:
        #     break
        if not  pd.isna(df.iloc[i][llm_model_name]):
            print(f"Skipping generating instruction.Already available row {i} for {llm_model_name}: {df.iloc[i][llm_model_name]}")
            
            continue

        dialogue_id = df.iloc[i]['dialogue_id']
        conversation = df.iloc[i]['conversation']
        intent = intent_dict[df.iloc[i]['service_call_method']]
        if  not 'Reserve' in intent  :continue
        result = execute_chain(chain,dialogue_id,intent,conversation)
        results.append(result)
        df.loc[i,llm_model_name]=result.rstrip()
        print(i,column_name,result)

        
    
    

SyntaxError: invalid syntax (2041513456.py, line 26)

### Save CSV File


In [9]:
# print(csv_file_name)
df.to_csv(csv_file_name, index=False)



In [None]:
import time
time.strftime('%Y%m%d%H%M')

'202409170616'

### Create a CSV Agent

In [None]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_csv_agent

In [None]:
agent = create_csv_agent(
    openai_llm,
    csv_file_name,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    allow_dangerous_code = True
)

In [None]:
agent.run("how many rows are there?")

I'm feeling hungry and would like to find a restaurant in San Jose that serves American cuisine.


### Entity Recognition

In [9]:
prompt_template = """
Your role is to identify name of a place type and type of service in that place from a user instruction.

Example:

I'm looking for a restaurant in Oakland that serves pizza.
Here Place is "Oakland" type is "restaurant" and other parameter is "pizza".

"""



prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",f"{prompt_template}",
        ),
        ("human", "{user_instruction}"),
    ]
)

openai_chain = prompt | openai_llm
gemini_chain = prompt | gemini_llm
ollama_chain = prompt | ollama_llm
mistral_chain = prompt | mistral_llm

def execute_chain(chain,user_instruction):            

    # Apply LLM chain on the desired column in each row
    result = chain.invoke(
        {
        "user_instruction":user_instruction,
        
        }
    )
    output_lines = result.content.splitlines()
    output_dict = {}

    # Iterate over each line, split by the colon, and strip extra spaces
    for line in output_lines:
        if ':' in line:
            key, value = line.split(':', 1)
            output_dict[key.strip()] = value.strip().strip('"')

    return output_dict

NameError: name 'openai_llm' is not defined

In [7]:
search_criteria=None
target_place = None
chain = openai_chain

df = pd.read_csv('train_mini.csv')

for i,row in df.iterrows():
    user_instruction = row["OpenAI"]
    print(user_instruction)
    result = execute_chain(chain,user_instruction)
    print(result)
    break

NameError: name 'openai_chain' is not defined