In [None]:
''' This notebook contains the following:
    M1 - GPT 3.5 Finetuned on the new tool prompts
    M2 - GPT 4

    Following tools have been added:
    'get_previous_sprint':'Returns the sprint id of the previous sprint',
    'return_top_k_items':'Returns the top k items from the given list of items',
'''
!pip install openai==0.28
!nvidia-smi
!pip install pandas
!CT_CUBLAS=1 pip install ctransformers --no-binary ctransformers

Mon Dec 11 12:16:00 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install accelerate
!pip install -U git+https://github.com/huggingface/accelerate.git

Collecting git+https://github.com/huggingface/accelerate.git
  Cloning https://github.com/huggingface/accelerate.git to /tmp/pip-req-build-gwgh6nfc
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/accelerate.git /tmp/pip-req-build-gwgh6nfc
  Resolved https://github.com/huggingface/accelerate.git to commit 694f2e2c12efbda81a1aa4b4b486767264116a2f
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: accelerate
  Building wheel for accelerate (pyproject.toml) ... [?25l[?25hdone
  Created wheel for accelerate: filename=accelerate-0.25.0.dev0-py3-none-any.whl size=267070 sha256=906af20a87fab2bd21de53f968dac477b28adcb4b0220bad5be224542e9d421e
  Stored in directory: /tmp/pip-ephem-wheel-cache-led_4pdh/wheels/9c/a3/1e/47368f9b6575655fe9ee1b6350cfa7d4b0befe66a35f8a8365
Successfully built accelerate
In

In [None]:
!pip install pytorch-pretrained-bert
import pytorch_pretrained_bert as ppb
assert 'bert-large-cased' in ppb.modeling.PRETRAINED_MODEL_ARCHIVE_MAP



In [None]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import openai
import os
import pandas as pd

In [None]:
sheet_id = "1N8oZ6XYKFeWbAwTr13yCSxbKu6eo-iKsYH3-IJ5Uuuk"
sheet_name = "Sheet1"
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

In [None]:
df=pd.read_csv(url)

In [None]:
queries=[] # This list will contain the list of all queries
for query in df['Prompt']:
  queries.append(query)

In [None]:
openai.api_key="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"
model1="ft:gpt-3.5-turbo-0613:devrev-inter-iit-tech-meet::8UUzm0Kh"

In [None]:
def get_tools(query):
  system_prompt = """ Find the tools that would be useful to answer the following query. The available tools and their uses are as follows:
  [
    'works_list':'returns a list of work-items matching the request',
    'summarize_objects':'summarizes a list of objects',
    'prioritize_objects':'sorts a list of objects by priority',
    'add_work_items_to_sprint':'Adds given work items to a sprint',
    'get_sprint_id':'Returns id of the current sprint',
    'get_similar_work_items':'Returns work items similar to the given work item',
    'search_object_by_name':'given a string, returns id of a matching object',
    'create_actionable_tasks_from_text':'Given a text, extracts actionable tasks',
    'who_am_i':'Returns id of the current user',
  ]
  Your answer should only compose of one or more of these tools. Any extra tool or text will be penalized. Return the tools enclosed in [ ].
  Given query is """

  user_prompt = f""": {query} : """

  final_prompt = system_prompt + "\n" + user_prompt
  messages=[{
      "role":"user",
      "content":final_prompt
  }]

  responses=openai.ChatCompletion.create(
      model=model1,
      messages=messages,
      temperature=0
  )

  return responses.choices[0].message['content']

In [None]:
tool_results=[]
for query in queries:
  input_string=get_tools(query)
  tool_results.append(input_string)

In [None]:
# Conversion of string to list:
def convert_string_to_list(string):
  """
  Converts a string in the format of a list to a list of strings.

  Args:
    string: The string to convert.

  Returns:
    A list of strings.
  """
  # Remove any leading and trailing spaces
  string = string.strip()

  # Check if the string starts and ends with square brackets
  if not string.startswith('[') or not string.endswith(']'):
    raise ValueError('Invalid string format. Expected format: [item1, item2, ...]')

  # Remove the square brackets
  string = string[1:-1]

  # Split the string by commas and strip any whitespace around each item
  items = [item.strip() for item in string.split(',')]

  return items

In [None]:
def analyze_query(tools_list, query_text):
    tools_purpose = {
        'works_list': 'Returns a list of objects matching the request',
        'summarize_objects': 'Summarizes a list of objects',
        'prioritize_objects': 'Returns a list of objects sorted by priority',
        'add_work_items_to_sprint':'Adds the given objects to the sprint',
        'get_sprint_id':'Return the id of the current sprint',
        'get_similar_work_items':'Returns a list of objects that are similar to the given object',
        'search_object_by_name':'Given a search string, returns the id of a matching object in the system of record',
        'create_actionable_tasks_from_text':'Given a text, extracts actionable text The text from which the actionable string insights, and creates tasks for them, which are kind of a work item',
        'who_am_i':'Returns string_id of current user',
        # 'get_previous_sprint':'Returns the sprint id of the previous sprint',
        # 'return_top_k_items':'Returns the top k items from the given list of items',
    }

    tools_arguments = {
        'works_list': ['applies_to_part: Array of strings to filter works relevant to', 'created_by: Takes array of strings and filters work created by users in the array', 'issue.priority: Array of strings to filter issues with given priorites in the array', 'issue.rev_orgs: Array of strings to filter issues for the organizations provided in the array', 'limit: integer providing the maximum number of works to return', 'owned_by: Array of strings to filter issues owned by users specified in the array', 'stage.name: Array of strings to filter work in the stages provided in the array', 'ticket.needs_response: Boolean value telling if a ticket needs a response','ticket.rev_org: Array of strings to return tickets associated with the given strings', 'ticket.severity: Array of strings to filter issues with given severity in the array', 'ticket.source_channel: Array of strings to filter for ticklets of the provided channels in the array', 'type: Array of strings with allowed values: [issue, ticket, task] Filters for work of the provided types' ],
        'summarize_objects': ['objects: List of object ids to summarize'],
        'prioritize_objects': ['objects: List of objects to prioritize'],
        'add_work_items_to_sprint': ['work_ids: List of objects to be added', 'sprint_id: Id of the sprint'],
        'get_sprint_id': [],
        'get_similar_work_items': ['work_id: id of work item to find similar items to'],
        'search_object_by_name': ['query: String to search for'],
        'create_actionable_tasks_from_text': ['text: Text to create actionable tasks from'],
        'who_am_i': [],
        # 'get_previous_sprint':[],
        # 'return_top_k_items':['objects: List of objects sorted by priority', 'k: Number of items to be returned']
    }

    relevant_purposes = {tool: tools_purpose[tool] for tool in tools_list if tool in tools_purpose}
    relevant_arguments = {tool: tools_arguments[tool] for tool in tools_list if tool in tools_arguments}

    output_string = f"The given query utilizes the following tools: {tools_list}. "
    output_string += f"The arguments of the tools and their description  is as follows. Format is 'argument_name:Purpose of argument': {relevant_arguments}. "
    output_string += f"The purpose of the tools is as follows: {relevant_purposes}. "
    output_string +="Note that the words issues, objects and work_items have been used interchangably"
    output_string += f"Find the values arguments for the given tools from the following text:\\ {query_text} \\"
    output_string += "Just return the value of the arguments, do not return anything else. In case you need to use the output of the previous tool as an input to the next tool, you can name it as $$PREV[i], where i is the index of the tool starting from 0. Return answer in nested JSON format with separate JSONS in one JSON for each tool named after the tool itself. The keys are: argument_name and argument_value. Every argument need not have a value. But every tool taking an argument must take atleast one argument. Only find values for relevant arguments."

    return output_string

In [None]:
arg_prompts=[]
for i in range(0,len(tool_results)):
  # convert string to list of tools
  tools_list=convert_string_to_list(tool_results[i])
  final_prompt=analyze_query(tools_list,queries[i]);
  arg_prompts.append(final_prompt)

In [None]:
model2 = "gpt-4"

In [None]:
def get_arguments(formatted_prompt):
  messages=[{
      "role":"user",
      "content":formatted_prompt
  }]

  responses=openai.ChatCompletion.create(
      model=model2,
      messages=messages,
      temperature=0
  )

  return responses.choices[0].message['content']

In [None]:
argument_predictions=[]
for prompt in arg_prompts:
  arg=get_arguments(prompt);
  argument_predictions.append(arg)

In [None]:
for arg in argument_predictions:
  print(arg)
  print('\n')

{
"get_similar_work_items": 
    {
        "work_id": "don:core:dvrv-us-1:devo/0:issue/1"
    },
"summarize_objects": 
    {
        "objects": "$$PREV[0]"
    }
}


{
  "works_list": [
    {
      "argument_name": "owned_by",
      "argument_value": ["Jane Doe"]
    },
    {
      "argument_name": "type",
      "argument_value": ["user story"]
    }
  ],
  "summarize_objects": [
    {
      "argument_name": "objects",
      "argument_value": ["$$PREV[0]"]
    }
  ]
}


The text does not provide enough specific information to extract values for the arguments of the given tools.


{
  "get_sprint_id": {},
  "works_list": {
    "applies_to_part": ["Security"],
    "type": ["issue"]
  },
  "summarize_objects": {
    "objects": ["$$PREV[1]"]
  },
  "create_actionable_tasks_from_text": {
    "text": ["$$PREV[2]"]
  }
}


{
  "works_list": [
    {
      "argument_name": "applies_to_part",
      "argument_value": ["UI/UX"]
    },
    {
      "argument_name": "type",
      "argument_value": ["

In [None]:
''' Now, we have 3 lists: queries, tool_results and argument_predictions.
We can summarize them into a CSV '''
data={'query':queries, 'tools':tool_results, 'arguments':argument_predictions}
df=pd.DataFrame(data)
df.to_csv('resultFinetunedIB06_base.csv')

In [None]:
def generate_json_schema(cell_value):
    prompt = """Convert the following argument cell value to the general JSON schema. If there are specific fields not present. Ignore them.
                  general JSON schema (results may be different):
                  [
                    "tool_name": "tool_name",
                    "arguments": [
                      {
                        "argument_name":"arg_name"
                        "argument_value":"arg_value"
                      }
                    ]
                  ]

                  An example:
                  [
                    {
                      "tool_name": "whoami",
                      "arguments": []
                    },
                    {
                      "tool_name": "works_list",
                      "arguments": [
                        {
                          "argument_name": "issue.priority",
                          "argument_value": ["p0"]
                        },
                        {
                          "argument_name": "owned_by",
                          "argument_value": ["$$PREV[0]"]
                        },
                        {
                          "argument_name": "type",
                          "argument_value": ["issue"]
                        }
                      ]
                    },
                    {
                      "tool_name": "prioritize_objects",
                      "arguments": [
                        {
                          "argument_name": "objects",
                          "argument_value": "$$PREV[1]"
                        }
                      ]
                    },
                    {
                      "tool_name": "get_sprint_id",
                      "arguments": []
                    },
                    {
                      "tool_name": "add_work_items_to_sprint",
                      "arguments": [
                        {
                          "argument_name": "work_ids",
                          "argument_value": "$$PREV[2]"
                        },
                        {
                          "argument_name": "sprint_id",
                          "argument_value": "$$PREV[3]"
                        }
                      ]
                    }
                ]"""
    messages=[{
      "role":"user",
      "content":prompt
  }]

    response=openai.ChatCompletion.create(
      model=model,
      messages=messages,
      temperature=0
  )
    generated_json_schema = response.choices[0].message['content']

    return generated_json_schema

csv_file_path = '/content/resultIB06_novel.csv'
df = pd.read_csv(csv_file_path)

df['generated_json_schema'] = df['arguments'].apply(generate_json_schema)

output_csv_path = 'output_file.csv'
df.to_csv(output_csv_path, index=False)