In [67]:
import json

# Read comments and issue data saved to local files (avoid duplicate API calls)
f1 = open('all-issues.json', 'r');

all_issues = json.loads(f1.read())

f1.close()

In [68]:
from random import randrange

# manually verify that the issues have been loaded
issue_no = randrange(len(all_issues))
print(len(all_issues))
print(issue_no)
print(json.dumps(all_issues[issue_no], indent=2))

588
163
{
  "url": "https://api.github.com/repos/aws-amplify/amplify-studio/issues/516",
  "repository_url": "https://api.github.com/repos/aws-amplify/amplify-studio",
  "labels_url": "https://api.github.com/repos/aws-amplify/amplify-studio/issues/516/labels{/name}",
  "comments_url": "https://api.github.com/repos/aws-amplify/amplify-studio/issues/516/comments",
  "events_url": "https://api.github.com/repos/aws-amplify/amplify-studio/issues/516/events",
  "html_url": "https://github.com/aws-amplify/amplify-studio/issues/516",
  "id": 1186608728,
  "node_id": "I_kwDOEt5APc5GujZY",
  "number": 516,
  "title": "Child property of component is not saved",
  "user": {
    "login": "layqnovo",
    "id": 100163734,
    "node_id": "U_kgDOBfhglg",
    "avatar_url": "https://avatars.githubusercontent.com/u/100163734?v=4",
    "gravatar_id": "",
    "url": "https://api.github.com/users/layqnovo",
    "html_url": "https://github.com/layqnovo",
    "followers_url": "https://api.github.com/users/layq

In [69]:

# promptTemplate = """
# AWS Amplify Studio customers post Github issues with their error messages and questions. Experts from AWS Amplify Studio respond with answers or solutions.
# You are looking at Github issues posted by other customers, which describe the details of their app and the error they are running into.
# You want to summarize the error and solutions from this Github issue's BODY and COMMENTS for other customers.

# The BODY contains details about the customer's app and error.
# The BODY generally contains the following sections: App Id, Region, Environment name, Figma File Version (if applicable), Amplify CLI Version, If applicable, what version of Node.js are you using?, What operating system are you using?, Browser type?, Describe the bug, Expected Behavior, Project Identifier, Additional information
# If any of the sections in the BODY are followed by `_No response_`, that means the customer did not provide these details. Ignore these values.

# The COMMENTS section contains the conversation between customers and engineers from Amplify Studio.
# Each message under COMMENTS will follow the below structure:
# ```
# CONTENT: <the contents of this comment>
# AUTHOR: <username>
# AUTHOR_TYPE: <Possible values are `CUSTOMER` and `EXPERT`>
# POSITIVE_REACTIONS: <Number of positive reactions>
# END COMMENT
# ```

# To find the error:
# - Look for descriptions of the bug, reproduction steps, comments from the customer, stack traces, and console logs in the BODY and TITLE.
# - Sometimes experts will post comments asking for more details about the error, and the customer may post more details about the error in the comments.
# - If an error message is not found, just say "No error found."
# - Customers will post specific details related to their app, like model names. Ignore specific details and try to write a general error message

# To find solutions for the error:
# - Look at comments from experts, paying attention to the number of positive reactions, and paying attention to whether customers thank the expert if their solution works.
# - Prioritize solutions from comments posted by experts.
# - Provide generalized answers.
# - Avoid including any names or customer details in the answer.
# - There may be multiple solutions. Use bullet points or numbered lists for each solution and separate each solution clearly.
# - Each solution should be around 3-5 sentences.
# - Only show solutions that exist in the comments.
# - There may not be a solution. For example, the customer may not respond in the comments, the experts may add the issue to the backlog, or the customer's issue may be a feature request.
# - If there is no solution, just say "No solution found."

# GITHUB ISSUE URL: {url}

# TITLE: {title}

# BODY:
# {body}

# COMMENTS:
# {comments}

# What is the error and solutions?
# """


promptTemplate = """
AWS Amplify Studio customers post Github issues with their error messages and questions. Experts from AWS Amplify Studio respond with answers or solutions.
You are looking at Github issues posted by other customers, which describe the details of their app and the error they are running into.
You want to summarize the error and solutions from this Github issue's BODY and COMMENTS for other customers.

The BODY contains details about the customer's app and error. The COMMENTS section contains the conversation between customers and engineers from Amplify Studio.

Each message under COMMENTS will follow the below structure:
```
CONTENT: <the contents of this comment>
AUTHOR: <username>
AUTHOR_TYPE: <Possible values are `CUSTOMER` and `EXPERT`>
POSITIVE_REACTIONS: <Number of positive reactions>
END COMMENT
```

To find the error:
- Look for descriptions of the bug, reproduction steps, comments from the customer, stack traces, and console logs in the BODY and TITLE.
- Sometimes experts will post comments asking for more details about the error, and the customer may post more details about the error in the comments.
- If an error message is not found, just say "No error found."
- Customers will post specific details related to their app, like model names. Ignore specific details and try to write a general error message

To find solutions for the error:
- Look at comments from experts, paying attention to the number of positive reactions, and paying attention to whether customers thank the expert if their solution works.
- Prioritize solutions from comments posted by experts.
- Provide generalized answers.
- Avoid including any names or customer details in the answer.
- There may be multiple solutions. Use bullet points or numbered lists for each solution and separate each solution clearly.
- Each solution should be around 3-5 sentences.
- Only show solutions that exist in the comments.
- There may not be a solution. For example, the customer may not respond in the comments, the experts may add the issue to the backlog, or the customer's issue may be a feature request.
- If there is no solution, just say "No solution found."

GITHUB ISSUE URL: {url}

TITLE: {title}

BODY:
{body}

COMMENTS:
{comments}

What is the error and solutions?
"""

singleCommentTemplate = """
CONTENT: {body}
AUTHOR: {author}
AUTHOR_TYPE: {author_type}
POSITIVE_REACTIONS: {positive_reactions}
END COMMENT
"""

expert_author_types = ["COLLABORATOR", "CONTRIBUTOR", "FIRST_TIME_CONTRIBUTOR", "MEMBER", "OWNER"];

In [70]:
def constructPromptForModel1(issue):
    issue_id = issue['number']
    issue_body = issue['body']
    if not issue_body:
        return 'UNABLE TO CONSTRUCT PROMPT'
    
    associated_comments = issue["comments"]
    associated_comments.sort(key=lambda x: x['created_at'])
    
    formattedCommentList = []
    for ac in associated_comments:
        comment_body = ac['body']
        user_name = ac['user']['login']
        author_type = 'EXPERT' if ac['author_association'] in expert_author_types else 'CUSTOMER'
        positive_reactions = max(ac['reactions']['total_count'] - ac['reactions']['confused'] - ac['reactions']['-1'], 0)
        formattedCommentList.append(singleCommentTemplate.format(body=comment_body, author=user_name, author_type=author_type, positive_reactions=positive_reactions))
    
    agg_comments_body = "".join(formattedCommentList)
    
    title = issue['title']
    
    return promptTemplate.format(url=issue["html_url"], title=title, body=issue_body, comments=agg_comments_body)

In [85]:
from random import randrange
issue_no = randrange(len(all_issues))
# issue_no = 489
print(issue_no)
print(constructPromptForModel1(all_issues[issue_no]))

44
UNABLE TO CONSTRUCT PROMPT


In [72]:
import re
# manually verify prompts are generated

all_prompts = list(map(constructPromptForModel1, all_issues))

# filter out prompts that are too long - https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
print(len(all_prompts))
threshold = 3*1950
for prompt in all_prompts:
    approx_char_count = len(prompt)
    if approx_char_count > 2190:
        print(approx_char_count)

# TODO: look into embedding prompts to handle large prompts

all_prompts = list(filter(lambda x: len(x) < threshold, all_prompts))
print(len(all_prompts))

588
588


In [86]:
import os
from dotenv import load_dotenv
import openai

load_dotenv()

openai.api_key = os.environ['OPENAI_API_KEY']

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


In [73]:
def generate_completions():
    for curr_prompt in all_prompts:
        response = openai.Completion.create(model="text-davinci-003", prompt=curr_prompt, max_tokens=100, temperature=0.5, n=2, presence_penalty=-1)
        choices = response['choices']
        choices_text = list(map(lambda x: x["text"], choices))
        yield { "prompt": curr_prompt, "choices": choices_text }

In [74]:
generator = generate_completions()

In [77]:
output = next(generator)
print(output["prompt"])
print('\n'.join(output["choices"]))

UNABLE TO CONSTRUCT PROMPT


I'm sorry, but it is not possible to construct a prompt without more information. Please provide more information about what kind of prompt you would like to construct.
 MESSAGE

Unfortunately, it is not possible to construct a prompt message without more information. The message should be tailored to the specific situation, and the exact wording should be determined based on the context.


In [76]:
# # TODO: is there an easy way to extract the error message? do we need to?
# def constructPromptForIssue(issue):
#     issue_id = issue['number']
#     body = issue['body']
#     if not issue['body']:
#         return 'UNABLE TO CONSTRUCT PROMPT'
#     if 'user-images.githubusercontent' in body:
#         print("found issue with screenshot")
#         return 'UNABLE TO CONSTRUCT PROMPT'
#     associated_comments = commentMapping[str(issue_id)]
#     if len(associated_comments) == 0:
#         return 'UNABLE TO CONSTRUCT PROMPT'
#     associated_comments.sort(key=lambda x: x['created_at'])
#     associated_comments_body = list(map(lambda x: x['author_association']+':\n'+x['body'], associated_comments))
#     agg_comments_body = '\n\n'.join(associated_comments_body)
#     prompt_body = '\n\n'.join([issue['body'], agg_comments_body])
#     prompt = "Question:\n"+ prompt_body + "\nAnswer:" + "\n\n=====\n\n"
#     return prompt

# # TODO: How to structure conversation
# def constructCompletionForIssue(issue):
#     issue_id = issue['number']
#     associated_comments = commentMapping[str(issue_id)]
#     if len(associated_comments) == 0:
#         return 'UNABLE TO CONSTRUCT COMPLETION'
#     associated_comments.sort(key=lambda x: x['created_at'])
    
#     # TODO: select most helpful comments?=
#     positive_associated_comments = list(filter(lambda x: (x['reactions']['total_count'] - x['reactions']['-1'] - x['reactions']['confused']) > 0, associated_comments))
#     positive_associated_comments_body = list(map(lambda x: x['body'], positive_associated_comments))
    
#     # TODO: if including the last comment as the completion, try to filter out useless comments like "closing this issue"
#     agg_last_two_comments = '\n\n'.join([associated_comments[-2]['body'], associated_comments[-1]['body']]) if len(associated_comments) > 1 else associated_comments[-1]['body']
#     completion_body = '\n\n'.join(positive_associated_comments_body) if len(positive_associated_comments) > 0 else agg_last_two_comments
#     completion = ' ' + completion_body + "\nEND"
#     return completion

# import json

# rows = []
# for issue in all_issues:
#     prompt = constructPromptForIssue(issue)
#     if prompt == 'UNABLE TO CONSTRUCT PROMPT':
#         print(f"skipping {issue['number']}")
#         continue
#     completion = constructCompletionForIssue(issue)
#     if completion == 'UNABLE TO CONSTRUCT COMPLETION':
#         print(f"skipping {issue['number']}")
#         continue
#     rows.append({ "prompt": prompt, "completion": completion });

# with open('fine-tune-dataset.jsonl', 'w') as outputFile:
#     for row in rows:
#         outputFile.write(json.dumps(row)+"\n")