In [2]:
import getpass
import json
import mlflow
import os
import pandas as pd
import requests
import subprocess
import time
import warnings

from datetime import datetime
from datetime import timedelta

from domino import Domino

from langchain import hub
from langchain.chains import LLMMathChain
# from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain.agents import AgentExecutor, AgentType, initialize_agent, create_csv_agent
# from langchain_openai import ChatOpenAI
from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool, StructuredTool, Tool, tool, DuckDuckGoSearchRun

from pydantic import BaseModel, Field

In [3]:
warnings.filterwarnings('ignore')
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter Your OpenAI API Key:")

Enter Your OpenAI API Key: ········


In [4]:
llm = ChatOpenAI(temperature=0)

In [12]:
search = DuckDuckGoSearchRun()

search_tool = Tool.from_function(
    func=search.run,
    name="Search",
    description="useful for when you need to search the internet for information"
)

llm_math_chain = LLMMathChain.from_llm(ChatOpenAI())

math_tool = Tool.from_function(
    func=llm_math_chain.run,
    name="Calculator",
    description="Useful for when you are asked to perform math calculations"
)

# create an experiment in MLflow
@tool("create_experiment", return_direct=True)
def create_experiment(experiment_name:str):
    """Create an experiment in Domino"""
    # we'll make the name unique in the project by appending a timestamp so that you and other users can run this cell more than once.
    timestamp = time.time()
    if experiment_name:
        experiment_name = f"{experiment_name}-{timestamp}"
        # below, we'll use the returned experiment_id in calls to mlflow.start_run() to add data to the experiment.
        experiment_id = mlflow.create_experiment(experiment_name.replace("experiment_name = \"", ""))
        print(f"Experiment id: {experiment_id}")
        print(f"Experiment name: {experiment_name}")


# execute a blocking job in Domino
@tool("create_run_job", return_direct=True)
def create_run_job(job_file:str):
    """Create and run  a job in Domino"""

    owner=os.getenv("DOMINO_PROJECT_OWNER")
    project=os.getenv("DOMINO_PROJECT_NAME")

    domino = Domino(
    f"{owner}/{project}",
    api_key=os.environ["DOMINO_USER_API_KEY"],
    host=os.environ["DOMINO_API_HOST"],
    )
    domino.authenticate(domino_token_file = os.getenv('DOMINO_TOKEN_FILE'))
    if job_file:
        # Blocking: this will start the run and wait for the run to finish before returning the status of the run
        domino_run = domino.runs_start_blocking(
            [job_file], title="Started from Domino agent"
        )
    print(domino_run)

# analyze control centre costs
@tool("analyze_costs", return_direct=True)
def analyze_costs(user_query:str) -> str:
    """Answer usage and cost analytics related questions for this deployment"""
    if not user_query:
        return None
    # get all the data and write it to a csv
    csv_file_name = 'control_centre_costs.csv'
    API_KEY = os.environ["DOMINO_USER_API_KEY"]
    URL = "https://johnal33586.marketing-sandbox.domino.tech/v4/gateway/runs/getByBatchId"
    headers = {'X-Domino-Api-Key': API_KEY}
    last_date = datetime.now().strftime('%Y-%m-%d')
    
    last_date = datetime.strftime(
        datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days = 1),
        '%Y-%m-%d',
    )
    
    try:
      os.remove(csv_file_name)
    except:
      pass
    
    batch_ID_param = ""
    while True:
        batch = requests.get(url = URL + batch_ID_param, headers = headers)
        parsed = json.loads(batch.text)
        batch_ID_param = "?batchId=" + parsed['nextBatchId']
        df = pd.DataFrame(parsed['runs'])
        df[df.endTime <= last_date].to_csv(
            csv_file_name,
            mode="a+",
            index=False,
            header=True)
        if len(df.index) < 1000 or len(df.index) > len(df[df.endTime <= last_date].index):
            break
            
    # create a csv agent
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
    agent_csv = create_csv_agent(
    llm,
    "control_centre_costs.csv",
    agent_type=AgentType.OPENAI_FUNCTIONS,
    verbose=False
    )
    return agent_csv.run(user_query)

In [13]:
tools = [search_tool, math_tool,create_experiment,create_run_job, analyze_costs]

agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False
)

In [8]:
agent.run("What is a Domino data set")

'A Domino data set is a read-write folder that allows for efficient storage and sharing of large amounts of data across projects in the Domino Data Lab platform.'

In [9]:
agent.run("create an experiment called agent_exp")

Experiment id: 12
Experiment name: experiment_name = "agent_exp-1704851032.4947498


In [10]:
agent.run("Where is Domino data lab located?")

'The headquarters of Domino Data Lab is located in San Francisco, California.'

In [14]:
agent.run("Run a job using job.py")

{'runId': '659df7360ba3b8384bafdafa', 'message': 'Run for project integration-test/SearchEngineAgent started. You can view it here:\nhttps://johnal33586.marketing-sandbox.domino.tech/jobs/integration-test/SearchEngineAgent/659df7360ba3b8384bafdafa\n'}


In [15]:
agent.run("What % of Fortune 100 has Domino Data Lab?")

'Over 20% of the Fortune 100 companies have Domino Data Lab.'

In [16]:
agent.run("What new features were launched in Domino Data Lab, give me a 200 word summary")

"Domino Data Lab has launched powerful new capabilities for building AI, including Generative AI (GenAI), rapidly and safely at scale. These capabilities are part of their fall 2023 platform release and include transforming Domino's AI Project Hub into an AI ecosystem hub. The new features aim to accelerate AI innovation and production with scalability and safety in mind. They also include updates for accelerating AI time-to-value, reducing costs, implementing responsible AI, and expanded cloud-based offerings."

In [20]:
agent.run("Which project used the most number of compute hours")

'The project that used the most compute hours is "SearchEngineAgent".'

In [21]:
agent.run("How many hours did the project SearchEngineAgent use on this deployment")

'The usage hours for the project "SearchEngineAgent" on this deployment is approximately 11.98 hours.'