In [265]:
import os
import json
import jsonlines
from time import sleep
from openai import Client
from dotenv import load_dotenv

In [252]:
load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
OPENAI_ASSISTANT_ID = os.environ["OPENAI_ASSISTANT_ID"] = os.getenv("OPENAI_ASSISTANT_ID")

In [253]:
with open('instructions.txt', 'r') as f:
    instructions = f.read()

In [254]:
client = Client(api_key=OPENAI_API_KEY)

In [255]:
assistant = client.beta.assistants.retrieve(
    assistant_id=OPENAI_ASSISTANT_ID
)
assistant.instructions = instructions

assistant

Assistant(id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', created_at=1702528907, description=None, file_ids=[], instructions='You are given a piece of json for an arbitrary course, you need to understand the course context, and then convert "prereq" field which is pure text into json.\n\nReserve the original pure text "prereq" into "prereq_text" field in the result json.\n\nCourse title and course code might be used interchangeably in the given text. Course code is always in the format of "ABCD".\n\nYou can use logical expressions "and" and "or" to help. For example:\n```json\n{\n    "type": "and",\n    "conditions": [\n        "Law 400",\n        {\n            "type": "or",\n            "conditions": [\n                "Engineering 233",\n                "Digital Engineering 233",\n                "Digital Engineering 440"\n            ]\n        }\n    ]\n}\n``` \n\nOnly course can be expressed as pure string in conditions.\n```json\n{\n    "type": "and",\n    "conditions": [\n        "Law 400"

## Conversion

In [293]:
with jsonlines.open("data/course-info.jsonl") as reader:
    course_info = list(reader)


In [294]:
thread = client.beta.threads.create()
thread

Thread(id='thread_Jm8V5kGBaArenQYabGYoSk8s', created_at=1703563649, metadata={}, object='thread')

In [295]:
course_info[2910]

{'cid': 39869,
 'code': 'CORE',
 'number': 595,
 'topic': 'Practicum II',
 'description': 'Senior level leadership, advocacy and reflective practice in partner agencies, associations and systems. Specifics to be negotiated with the student. Content on professional ethics will also be covered.',
 'sub_topics': None,
 'units': 3.0,
 'credits': None,
 'hours': ['2T/4-10'],
 'time_length': None,
 'prereq': '<course cid="39868">CORE 594</course>.',
 'coreq': None,
 'antireq': 'Credit for <course cid="39869">CORE 595</course> and 589.02 (Supervision In Community Practice) will not be allowed.',
 'notes': 'Course needs to be taken in combination with <course cid="39868">Community Rehabilitation 594</course> in the same academic year.',
 'aka': None,
 'repeat': False,
 'nogpa': True}

In [296]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=str(course_info[2910]),
)

message

ThreadMessage(id='msg_MVvgjKzKdnIt3DE3TOFwSx4W', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='{\'cid\': 39869, \'code\': \'CORE\', \'number\': 595, \'topic\': \'Practicum II\', \'description\': \'Senior level leadership, advocacy and reflective practice in partner agencies, associations and systems. Specifics to be negotiated with the student. Content on professional ethics will also be covered.\', \'sub_topics\': None, \'units\': 3.0, \'credits\': None, \'hours\': [\'2T/4-10\'], \'time_length\': None, \'prereq\': \'<course cid="39868">CORE 594</course>.\', \'coreq\': None, \'antireq\': \'Credit for <course cid="39869">CORE 595</course> and 589.02 (Supervision In Community Practice) will not be allowed.\', \'notes\': \'Course needs to be taken in combination with <course cid="39868">Community Rehabilitation 594</course> in the same academic year.\', \'aka\': None, \'repeat\': False, \'nogpa\': True}'), type='text')], created_at=1703563649, file_ids=[]

In [297]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions=instructions + "\nYour response in only a json, with no markdown and can be fully parsed with Python `json.loads` method.",
)

run

Run(id='run_XQUXXY5fgh57BGgIJCkgLHpJ', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', cancelled_at=None, completed_at=None, created_at=1703563650, expires_at=1703564250, failed_at=None, file_ids=[], instructions='You are given a piece of json for an arbitrary course, you need to understand the course context, and then convert "prereq" field which is pure text into json.\n\nReserve the original pure text "prereq" into "prereq_text" field in the result json.\n\nCourse title and course code might be used interchangeably in the given text. Course code is always in the format of "ABCD".\n\nYou can use logical expressions "and" and "or" to help. For example:\n```json\n{\n    "type": "and",\n    "conditions": [\n        "Law 400",\n        {\n            "type": "or",\n            "conditions": [\n                "Engineering 233",\n                "Digital Engineering 233",\n                "Digital Engineering 440"\n            ]\n        }\n    ]\n}\n``` \n\nOnly course can be expressed as 

### Result

In [298]:
is_complete = False

# Wait until complete
while not is_complete:
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,
        run_id=run.id,
    )

    is_complete = run.status == "completed"

    if not is_complete:
        sleep(4)

run

Run(id='run_XQUXXY5fgh57BGgIJCkgLHpJ', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', cancelled_at=None, completed_at=1703563660, created_at=1703563650, expires_at=None, failed_at=None, file_ids=[], instructions='You are given a piece of json for an arbitrary course, you need to understand the course context, and then convert "prereq" field which is pure text into json.\n\nReserve the original pure text "prereq" into "prereq_text" field in the result json.\n\nCourse title and course code might be used interchangeably in the given text. Course code is always in the format of "ABCD".\n\nYou can use logical expressions "and" and "or" to help. For example:\n```json\n{\n    "type": "and",\n    "conditions": [\n        "Law 400",\n        {\n            "type": "or",\n            "conditions": [\n                "Engineering 233",\n                "Digital Engineering 233",\n                "Digital Engineering 440"\n            ]\n        }\n    ]\n}\n``` \n\nOnly course can be expressed as 

In [299]:
messages = client.beta.threads.messages.list(
    thread_id=thread.id,
)

messages

SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_2rQz0YySgzst8bdlw3YhJghZ', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', content=[MessageContentText(text=Text(annotations=[], value='{\n  "cid": 39869, \n  "code": "CORE", \n  "number": 595, \n  "topic": "Practicum II", \n  "description": "Senior level leadership, advocacy and reflective practice in partner agencies, associations and systems. Specifics to be negotiated with the student. Content on professional ethics will also be covered.", \n  "sub_topics": None, \n  "units": 3.0, \n  "credits": None, \n  "hours": ["2T/4-10"], \n  "time_length": None, \n  "prereq_text": "CORE 594.",\n  "prereq": {\n        "type": "and",\n        "conditions": [\n            "CORE 594"\n        ]\n    },\n  "coreq": None, \n  "antireq": {\n      "type": "exclude_courses",\n      "courses": [\n        "CORE 595",\n        "589.02"\n      ]\n   }, \n  "notes": "Course needs to be taken in combination with Community Rehabilitation 594 in the same

In [300]:
messages.data

[ThreadMessage(id='msg_2rQz0YySgzst8bdlw3YhJghZ', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', content=[MessageContentText(text=Text(annotations=[], value='{\n  "cid": 39869, \n  "code": "CORE", \n  "number": 595, \n  "topic": "Practicum II", \n  "description": "Senior level leadership, advocacy and reflective practice in partner agencies, associations and systems. Specifics to be negotiated with the student. Content on professional ethics will also be covered.", \n  "sub_topics": None, \n  "units": 3.0, \n  "credits": None, \n  "hours": ["2T/4-10"], \n  "time_length": None, \n  "prereq_text": "CORE 594.",\n  "prereq": {\n        "type": "and",\n        "conditions": [\n            "CORE 594"\n        ]\n    },\n  "coreq": None, \n  "antireq": {\n      "type": "exclude_courses",\n      "courses": [\n        "CORE 595",\n        "589.02"\n      ]\n   }, \n  "notes": "Course needs to be taken in combination with Community Rehabilitation 594 in the same academic year.", \n  "aka": None, 

In [301]:
json_text = messages.data[0].content[0].text.value
print(json_text)

{
  "cid": 39869, 
  "code": "CORE", 
  "number": 595, 
  "topic": "Practicum II", 
  "description": "Senior level leadership, advocacy and reflective practice in partner agencies, associations and systems. Specifics to be negotiated with the student. Content on professional ethics will also be covered.", 
  "sub_topics": None, 
  "units": 3.0, 
  "credits": None, 
  "hours": ["2T/4-10"], 
  "time_length": None, 
  "prereq_text": "CORE 594.",
  "prereq": {
        "type": "and",
        "conditions": [
            "CORE 594"
        ]
    },
  "coreq": None, 
  "antireq": {
      "type": "exclude_courses",
      "courses": [
        "CORE 595",
        "589.02"
      ]
   }, 
  "notes": "Course needs to be taken in combination with Community Rehabilitation 594 in the same academic year.", 
  "aka": None, 
  "repeat": False, 
  "nogpa": True
}


In [302]:
obj = eval(json_text)
obj

{'cid': 39869,
 'code': 'CORE',
 'number': 595,
 'topic': 'Practicum II',
 'description': 'Senior level leadership, advocacy and reflective practice in partner agencies, associations and systems. Specifics to be negotiated with the student. Content on professional ethics will also be covered.',
 'sub_topics': None,
 'units': 3.0,
 'credits': None,
 'hours': ['2T/4-10'],
 'time_length': None,
 'prereq_text': 'CORE 594.',
 'prereq': {'type': 'and', 'conditions': ['CORE 594']},
 'coreq': None,
 'antireq': {'type': 'exclude_courses', 'courses': ['CORE 595', '589.02']},
 'notes': 'Course needs to be taken in combination with Community Rehabilitation 594 in the same academic year.',
 'aka': None,
 'repeat': False,
 'nogpa': True}