In [1]:
 if 'google.colab' in str(get_ipython()):
    # authorize & import/mount colab/google drive
    from google.colab import output
    from google.colab import drive
    from google.colab import auth
    drive.mount('/content/gd')

    # install libraries
    !pip install --upgrade openai
    !pip install --upgrade numba
    !pip install transformers
    !pip install sentence_transformers
    !pip install unidecode
    !pip install bertopic
    !pip install unidecode
    !pip install pandas==1.1.5

    # clear output
    output.clear()

In [1]:
import pandas as pd
import numpy as np
import re
import warnings
import openai
import json
pd.set_option('max_colwidth', 300)
warnings.filterwarnings("ignore")
import os

In [2]:
os.chdir("/content/gd/MyDrive/w210_Capstone_Project_Fall2021/Repo/Colab_notebooks/")

In [3]:
openai.api_key = os.environ['openai_api']

## Classification

In [4]:
# Classification data is in memory
with open("classification_examples_w_labels.txt", "r") as fp:
  examples = json.load(fp)

In [6]:
len(examples)

62

In [5]:
examples

[['Where did you grow up?', 'Factual'],
 ["Is there a place you go when you go to inside of you when you're free soloing that's different then when you're out with ropes and bolts on a rock?",
  'Non-factual'],
 ['What is your name?', 'Factual'],
 ['What do you want others to know or understand about you?', 'Non-factual'],
 ['When and where were you born?', 'Factual'],
 ['What are you most grateful for this year?', 'Non-factual'],
 ['What are or were some of your hobbies?', 'Factual'],
 ["If you're feeling down, how do you cheer yourself up?", 'Non-factual'],
 ['Are you a sports fan? If so, what is your favorite team?', 'Factual'],
 ['What is an interesting fact about you?', 'Factual'],
 ['How do you handle anger and frustration?', 'Non-factual'],
 ['What makes you feel most alive?', 'Non-factual'],
 ['What is your usual breakfast?', 'Factual'],
 ['Which language did your mom talk to you in?', 'Factual'],
 ["Would you admit that most humans in pockets of society live within a status qu

In [None]:
# List out files we uploaded
openai.File.list()

<OpenAIObject list at 0x7fcdf0c6b4d0> JSON: {
  "data": [
    {
      "bytes": 3563,
      "created_at": 1632647535,
      "filename": "compiled_results.csv",
      "id": "file-CLHLESQvjr1UryRTmYscIfKU",
      "object": "file",
      "purpose": "fine-tune-results",
      "status": "processed",
      "status_details": null
    },
    {
      "bytes": 63542,
      "created_at": 1632611084,
      "filename": "/content/gd/MyDrive/w210_Capstone_Project_Fall2021/Datasets/Alex_Honnold/gpt-3_prepared.jsonl",
      "id": "file-VKluziYeJmZQFbnbj7j8fSD4",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processed",
      "status_details": null
    },
    {
      "bytes": 2376,
      "created_at": 1632611201,
      "filename": "compiled_results.csv",
      "id": "file-yoyurg3ZcUpqmngIdJp7JGIv",
      "object": "file",
      "purpose": "fine-tune-results",
      "status": "processed",
      "status_details": null
    },
    {
      "bytes": 264387,
      "created_at": 16326473

In [11]:
# Test out Classification endpoint
openai.Classification.create(
    search_model="ada", 
    model="davinci",
    examples= examples,
    query="When is love going to come?",
    labels = ["Factual","Non-factual"], 
    temperature=0,   
    max_examples=len(examples)
)

<OpenAIObject classification at 0x7f5675b9b170> JSON: {
  "completion": "cmpl-4BCxze4lQSIKjOutbbZOg5aW5Cq0I",
  "label": "Non-factual",
  "model": "davinci:2020-05-03",
  "object": "classification",
  "search_model": "ada",
  "selected_examples": [
    {
      "document": 4,
      "label": "Factual",
      "text": "When and where were you born?"
    },
    {
      "document": 0,
      "label": "Factual",
      "text": "Where did you grow up?"
    },
    {
      "document": 1,
      "label": "Non-factual",
      "text": "Is there a place you go when you go to inside of you when you're free soloing that's different then when you're out with ropes and bolts on a rock?"
    },
    {
      "document": 24,
      "label": "Non-factual",
      "text": "What do you believe is the most important characteristic your sons and daughters should have?"
    },
    {
      "document": 56,
      "label": "Non-factual",
      "text": "Is your glass half full or half empty?"
    },
    {
      "document":

In [12]:
question_type = openai.Classification.create(
    search_model="babbage", 
    model="davinci",
    examples= examples,
    query="Tell me your understanding of life",
    labels = ["Factual","Non-factual"],    
    max_examples=len(examples)
)
question_type["label"]

'Non-factual'

In [13]:
question_type = openai.Classification.create(
    search_model="babbage", 
    model="davinci",
    examples= examples,
    query="How much money do you make?",
    labels = ["Factual","Non-factual"],    
    max_examples=len(examples)
)
question_type["label"]

'Factual'

In [14]:
question_type = openai.Classification.create(
    search_model="ada", 
    model="davinci",
    examples= examples,
    query="Why do you love climbing?",
    labels = ["Factual","Non-factual"],    
    max_examples=len(examples)
)
question_type["label"]

'Non-factual'

## Question Answering

In [None]:
# Upload file for Q&A model
# openai.File.create(file=open("questionanswer.jsonl"), 
#                    purpose="answers")

<File file id=file-mcgwAkzglsZSibNyeFuuGjcH at 0x7fcdf112ce90> JSON: {
  "bytes": 378225,
  "created_at": 1637812059,
  "filename": "questionanswer.jsonl",
  "id": "file-mcgwAkzglsZSibNyeFuuGjcH",
  "object": "file",
  "purpose": "answers",
  "status": "uploaded",
  "status_details": null
}

In [15]:
try:
  qa_answer = openai.Answer.create(
      search_model="babbage", 
      model="davinci", 
      question="Tell me some advice for a new climber", 
      file="file-mcgwAkzglsZSibNyeFuuGjcH",
      examples_context="In 2017, U.S. life expectancy was 78.6 years.", 
      examples=[["What is human life expectancy in the United States?", "78 years."]], 
      max_rerank=200,
      max_tokens=25,
      temperature=0.2,
      stop=["\n", "<|endoftext|>"]
  )
  qa_answer_parse = qa_answer['answers'][0]

  if not qa_answer_parse.endswith("."):
    print("not complete")
    qa_answer_x = qa_answer_parse.split(".")
    print(qa_answer_x)
    print(qa_answer_x[0]+'.')
  else: 
    print("complete")
    print(qa_answer_parse)
except:
  print("This is not within my training data, I don't have the an answer. Sorry.")

complete
Get a pair of climbing shoes.
