### Setup

In [8]:
from openai import OpenAI
import pandas as pd
import json
client = OpenAI()
# Load your data
df = pd.read_csv(r'C:\Users\AakashAI\Desktop\Repositories\Sales Agent\cleaned_leads.csv')

# Define filterable columns
filter_columns = [
    'Lead Number', 'Lead Source', 'Lead Origin', 'Do Not Email', 'Do Not Call',
    'Converted', 'Country', 'Lead Stage', 'City'
]

# Generate schema for prompting
def generate_schema(df, columns):
    schema = []
    for col in columns:
        if col not in df.columns:
            continue
        dtype = str(df[col].dtype)
        examples = df[col].dropna().unique()[:5]
        example_values = ", ".join([str(e) for e in examples])
        schema.append(f"- {col} ({dtype}): e.g., {example_values}")
    return "\n".join(schema)


In [50]:
def draft_sales_email_by_lead_number(lead_number: int, product: str, first_name: str, last_name: str) -> str:
    """
    Drafts a personalized sales email for a given lead number by looking up the lead data from the DataFrame.
    
    Parameters:
      lead_number (int): The unique identifier of the lead.
      product (str): The product or service to pitch.
    
    Returns:
      str: The drafted sales email.
    
    Raises:
      Exception: If the lead number is not found.
    """
    agent_name = "Jane Doe"  # Placeholder for the agent's name
    agent_company = "Tech Solutions"  # Placeholder for the agent's company
    agent_contact_information = "jane.doe@xyz.com"

    email_body ={
        ""
    }

    # Look up the lead in the DataFrame
    lead_data = df_leads[df_leads['Lead Number_x'] == lead_number]
    
    if lead_data.empty:
        raise Exception(f"Lead with number {lead_number} not found.")
    
    # Assuming each Lead Number is unique, get the first row
    lead = lead_data.iloc[0].to_dict()
    
    # Create the email subject and body using the lead details
    email_subject = f"Discover How {product} Can Benefit {first_name, last_name}"
    email_body = (
        f"Hi {first_name} {last_name},\n\n"
        f"I hope you're doing well. My name is {agent_name} and I work as a Sales Engineer at {agent_company}.\n\n"
        f"I'm reaching out because I believe our {product} can significantly help {lead.get('Company', 'your company')} meet its goals. "
        "We have a strong track record of helping organizations in your industry optimize their processes and drive success.\n\n"
        "I'd love to arrange a brief call to discuss how we can support your goals and explore further benefits of our solution.\n\n"
        "Looking forward to hearing from you.\n\n"
        "Best regards,\n"
        f"{agent_name}\n"
        f"{agent_company}\n"
        f"{agent_company}"
    )
    
    return f"Subject: {email_subject}\n\n{email_body}"


In [None]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "execute_pandas_query",
            "description": "Run a pandas filtering expression on the DataFrame df.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query_str": {
                        "type": "string",
                        "description": "Filtering expression using df, like (df['City'] == 'Delhi') & (df['Converted'] == 0)"
                    }
                },
                "required": ["query_str"]
            }
        }
    }
]
def execute_pandas_query(df, query_str):
    print("Executing query:", query_str)
    local_vars = {"df": df.copy()}
    try:
        result = eval(query_str, {"__builtins__": {}}, local_vars)
        if isinstance(result, pd.Series):
            result = df[result]
        return result.to_dict(orient="records")[:5]  # limit result
    except Exception as e:
        return {"error": str(e)}
    
def handle_tool_call(tool_call, df):
    print("Tool call received:", tool_call)
    function_name = tool_call.function.name
    arguments = json.loads(tool_call.function.arguments)

    if function_name == "execute_pandas_query":
        print("Executing function:", function_name)
        query_str = arguments["query_str"]
        result = execute_pandas_query(df, query_str)

        if isinstance(result, pd.DataFrame):
            return result.to_dict(orient="records")
        elif isinstance(result, pd.Series):
            return result.tolist()
        else:
            return result
    elif function_name == "nl_to_query":
        print("Converting natural language to query:", arguments["nl_query"])
        nl_query = arguments["nl_query"]
        result = nl_to_query(nl_query)
        return result
    elif function_name == "draft_sales_email_by_lead_number":
        print("Drafting sales email by lead number:", arguments["lead_number"])
        lead_number = arguments["lead_number"]
        result = draft_sales_email_by_lead_number(lead_number)
        return result


In [45]:
def nl_to_query(nl_query):
    print("Entered NL query:", nl_query)
    schema_description = generate_schema(df, filter_columns)
    prompt= f"""
    You are an expert data analyst.

    You have access to a tool called `execute_pandas_query` that accepts a Python expression as input and runs it on a DataFrame called `df`.

    Here is the DataFrame schema:
    {schema_description}

    Understand the schema and the valid values in the columns.
    rephrase the user query with available values within the schema.

    Now, a user has asked to filter the DataFrame with this request:
    '{nl_query}'

    You should call the tool with the appropriate pandas filtering expression using `df`.

    Only call the tool. Do not answer the query yourself.


    """
    # print("Prompt for OpenAI:", prompt)
    messages=[
        {"role": "system", "content": "You are a pandas expert."},
        {"role": "user", "content": prompt}
    ]
    response = client.chat.completions.create(
            messages=messages,
            model="gpt-4o",
            stream=False,
            tool_choice="auto",
            tools=tools,
            temperature=0, # Keep temperature low for deterministic SQL generation
        )
    try:
        print(response.choices[0].message.content)
    except Exception as e:
        print(f"Error: {e}")
       
    tool_call = response.choices[0].message.tool_calls[0]
    print(f"Tool call: {tool_call}")
    tool_result = handle_tool_call(tool_call, df)
    print(tool_result)

    #update the messages
    messages=[
                *messages,
                {
                    "role": "assistant",
                    "tool_calls": [tool_call]
                },
                {
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "name": "execute_pandas_query",
                    "content": json.dumps(tool_result)
                }
            ]
        

    return tool_result

nl_query = "Can you get me leads from Bombay?"
code = nl_to_query(nl_query)
# print(code)

Entered NL query: Can you get me leads from Bombay?
None
Tool call: ChatCompletionMessageToolCall(id='call_hSzp1WaMDrcb9gjjOlN02f6e', function=Function(arguments='{"query_str":"df[\'City\'] == \'Mumbai\'"}', name='execute_pandas_query'), type='function')
Tool call received: ChatCompletionMessageToolCall(id='call_hSzp1WaMDrcb9gjjOlN02f6e', function=Function(arguments='{"query_str":"df[\'City\'] == \'Mumbai\'"}', name='execute_pandas_query'), type='function')
Executing function: execute_pandas_query
Executing query: df['City'] == 'Mumbai'
[{'Lead Number': 659357, 'Lead Source': 'Google', 'Lead Origin': 'Landing Page Submission', 'Do Not Email': 'No', 'Do Not Call': 'No', 'Converted': 0, 'Country': 'India', 'Mobile Number': nan, 'Lead Stage': 'Unreachable', 'Lead Grade': nan, 'Lead Score': 120, 'Age': 'Select', 'Email': 'Yogeshsadarang@yahoo.in', 'City': 'Mumbai'}, {'Lead Number': 655287, 'Lead Source': 'Direct Traffic', 'Lead Origin': 'Landing Page Submission', 'Do Not Email': 'No', 'Do 

5

In [None]:
system_instructions = """
You are a friendly and helpful assistant for a Sales Development Representative (SDR).

Your primary responsibility is to help the SDR filter and identify relevant leads and write personalized sales emails.

You have access to one tool:
1. `nl_to_query` – USe this to access the leads database. You need to input the SDR's request in natural language, and it will convert it into a pandas query string.


Do not try to write the pandas query yourself.
Always use the `nl_to_query` tool to translate the manager’s request to retrieve the relevant data.
"""
memory = {
    "sdr" : {
        "messages": [
        {"role": "system", "content": system_instructions},
    ]
    }
}

In [41]:
tools_1 = [

    {
  "type": "function",
  "function": {
    "name": "nl_to_query",
    "description": "Convert a natural language query into a valid pandas filtering expression.",
    "parameters": {
      "type": "object",
      "properties": {
        "nl_query": {
          "type": "string",
          "description": "The user's natural language filter request (e.g., 'show leads from Delhi with conversion 0')."
        }
      },
      "required": ["nl_query"]
    }
  }
}

]


def sdr(user_message):
    
    # print("Prompt for OpenAI:", prompt)
    memory['sdr']['messages'].append(
        {"role": "user", "content": user_message}
    )
    response = client.chat.completions.create(
            messages=memory['sdr']['messages'],
            model="gpt-4o",
            stream=False,
            tool_choice="auto",
            tools=tools_1,
            temperature=0, # Keep temperature low for deterministic SQL generation
        )
    print((response))
    if not response.choices[0].message.tool_calls:
        memory['sdr']['messages'].append(
            {
                "role": "assistant",
                "content": response.choices[0].message.content
            }
        )
        return response.choices[0].message.content

    tool_call = response.choices[0].message.tool_calls[0]
    print(f"Tool call: {tool_call}")
    tool_result = handle_tool_call(tool_call, df)
    print(tool_result)
    memory['sdr']['messages']=[
                *memory['sdr']['messages'],
                {
                    "role": "assistant",
                    "tool_calls": [tool_call]
                },
                {
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "name": "nl_to_query",
                    "content": json.dumps(tool_result)
                }
            ]
    followup = client.chat.completions.create(
            model="gpt-4o",
            messages=memory['sdr']['messages']
        )
    return followup.choices[0].message.content
    

In [49]:
user_input = input()
sdr(user_input)

ChatCompletion(id='chatcmpl-BLSRaZC1jushifFmtf5T3kHNfnaFn', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_bxhHLJ2VctVjFeg7PnZUazw7', function=Function(arguments='{"nl_query":"leads from bombay who are unemployed"}', name='nl_to_query'), type='function')]))], created=1744453926, model='gpt-4o-2024-08-06', object='chat.completion', service_tier='default', system_fingerprint='fp_92f14e8683', usage=CompletionUsage(completion_tokens=24, prompt_tokens=843, total_tokens=867, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))
Tool call: ChatCompletionMessageToolCall(id='call_bxhHLJ2VctVjFeg7PnZUazw7', function=Function(arguments='

'There is 1 lead from Bombay who is marked as unemployed.'