In [154]:
import os
import json
import jsonlines
from time import sleep
from openai import Client
from dotenv import load_dotenv

In [155]:
load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
OPENAI_ASSISTANT_ID = os.environ["OPENAI_ASSISTANT_ID"] = os.getenv("OPENAI_ASSISTANT_ID")

In [156]:
with open('instructions-2.txt', 'r') as f:
    instructions = f.read()

In [157]:
client = Client(api_key=OPENAI_API_KEY)

In [158]:
assistant = client.beta.assistants.retrieve(
    assistant_id=OPENAI_ASSISTANT_ID
)
assistant.instructions = instructions

assistant

Assistant(id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', created_at=1702528907, description=None, file_ids=[], instructions='You are given a piece of json for an arbitrary course, you need to understand the course context, \nand then convert "prereq", "antireq" and "coreq" fields (which are pure text) into jsonlogic expressions.\n\nYou are allow to use the following custom jsonlogic operators:\n1. `courses`: a list of all courses that the student has taken\n2. `course`: a course object, given `code` and `number` fields; return `None` if the course is not taken.\n3. `units`: a total number of units taken in a list of courses, given as a list of course objects.\n4. `consent`: a boolean value, given a consenter; always return `True`.\n\nYou are allow to use any builtin jsonlogic operators. `some` and `all` are more preferable than `or` and `and`.\n\n\nJsonLogic will be given a piece of data about the student, which includes the courses that the student has taken, and the programs that the student is

## Conversion

In [169]:
with jsonlines.open("data/course.jsonl") as reader:
    course_info = list(reader)


In [170]:
thread = client.beta.threads.create()
thread

Thread(id='thread_Pm8iwvKkXAC5h3LXzs0KVa49', created_at=1704147490, metadata={}, object='thread')

In [172]:
# Prepare content

courses_list = []
courses_list = filter(lambda x: x["code"] == "CPSC", course_info)
courses_list = list(map(lambda x: json.dumps(x), courses_list))
courses_list = courses_list[:5]

content = "\n".join(map(lambda x: str(x), courses_list))
print(content)

{"cid": "107154", "code": "CPSC", "number": "203", "faculty": "SC", "departments": ["CPSC"], "units": 3.0, "prereq": null, "antireq": "Not open for registration to Computer Science majors.", "coreq": null}
{"cid": "160416", "code": "CPSC", "number": "217", "faculty": "SC", "departments": ["CPSC"], "units": 3.0, "prereq": null, "antireq": "Credit for CPSC 217 and any of 215, 231, 235, DATA 211, ENCM 339, ENGG 233 or ENDG 233 will not be allowed.", "coreq": null}
{"cid": "160417", "code": "CPSC", "number": "219", "faculty": "SC", "departments": ["CPSC"], "units": 3.0, "prereq": "CPSC 217 or DATA 211.", "antireq": "Credit for CPSC 219 and any of 233, 235, ENEL 497 or ENCM 493 will not be allowed.", "coreq": null}
{"cid": "107164", "code": "CPSC", "number": "231", "faculty": "SC", "departments": ["CPSC"], "units": 3.0, "prereq": "Admission to Computer Science, Bioinformatics, or Natural Science with a primary concentration in Computer Science.", "antireq": "Credit for CPSC 231 and any of C

In [162]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=content,
)

message

ThreadMessage(id='msg_oYKKaNeBrpB7UFTf9OgAwhEy', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='{"cid": "107154", "prereq": null, "antireq": "Not open for registration to Computer Science majors.", "coreq": null}\n{"cid": "160416", "prereq": null, "antireq": "Credit for CPSC 217 and any of 215, 231, 235, DATA 211, ENCM 339, ENGG 233 or ENDG 233 will not be allowed.", "coreq": null}\n{"cid": "160417", "prereq": "CPSC 217 or DATA 211.", "antireq": "Credit for CPSC 219 and any of 233, 235, ENEL 497 or ENCM 493 will not be allowed.", "coreq": null}\n{"cid": "107164", "prereq": "Admission to Computer Science, Bioinformatics, or Natural Science with a primary concentration in Computer Science.", "antireq": "Credit for CPSC 231 and any of CPSC 215, 217, 235, DATA 211, ENCM 339, ENGG 233, or ENDG 233 will not be allowed.", "coreq": null}\n{"cid": "107165", "prereq": "CPSC 231 and admission to Computer Science, Bioinformatics, or Natural Science with a primary c

In [163]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions=instructions,
)

run

Run(id='run_Zlot4Hzuyqe6ecrJIzO1EGFQ', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', cancelled_at=None, completed_at=None, created_at=1704075477, expires_at=1704076077, failed_at=None, file_ids=[], instructions='You are given a piece of json for an arbitrary course, you need to understand the course context, \nand then convert "prereq", "antireq" and "coreq" fields (which are pure text) into jsonlogic expressions.\n\nYou are allow to use the following custom jsonlogic operators:\n1. `courses`: a list of all courses that the student has taken\n2. `course`: a course object, given `code` and `number` fields; return `None` if the course is not taken.\n3. `units`: a total number of units taken in a list of courses, given as a list of course objects.\n4. `consent`: a boolean value, given a consenter; always return `True`.\n\nYou are allow to use any builtin jsonlogic operators. `some` and `all` are more preferable than `or` and `and`.\n\n\nJsonLogic will be given a piece of data about the st

### Result

In [164]:
is_complete = False

# Wait until complete
while not is_complete:
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,
        run_id=run.id,
    )

    is_complete = run.status == "completed"

    if not is_complete:
        print("Waiting for completion...")
        sleep(10)

run

Waiting for completion...
Waiting for completion...


Run(id='run_Zlot4Hzuyqe6ecrJIzO1EGFQ', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', cancelled_at=None, completed_at=1704075495, created_at=1704075477, expires_at=None, failed_at=None, file_ids=[], instructions='You are given a piece of json for an arbitrary course, you need to understand the course context, \nand then convert "prereq", "antireq" and "coreq" fields (which are pure text) into jsonlogic expressions.\n\nYou are allow to use the following custom jsonlogic operators:\n1. `courses`: a list of all courses that the student has taken\n2. `course`: a course object, given `code` and `number` fields; return `None` if the course is not taken.\n3. `units`: a total number of units taken in a list of courses, given as a list of course objects.\n4. `consent`: a boolean value, given a consenter; always return `True`.\n\nYou are allow to use any builtin jsonlogic operators. `some` and `all` are more preferable than `or` and `and`.\n\n\nJsonLogic will be given a piece of data about the st

In [165]:
messages = client.beta.threads.messages.list(
    thread_id=thread.id,
)

messages

SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_4y8cjMUilXBR6wqhMU1uXYzo', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', content=[MessageContentText(text=Text(annotations=[], value='{"cid": "107154", "prereq": {}, "antireq": {"!=": [{"some": {"var": "programs"}}, "CS"]}, "coreq": {}}\n{"cid": "160416", "prereq": {}, "antireq": {"none": [{"course": ["CPSC", "217"]}, {"course": ["CPSC", "215"]}, {"course": ["CPSC", "231"]}, {"course": ["CPSC", "235"]}, {"course": ["DATA", "211"]}, {"course": ["ENCM", "339"]}, {"course": ["ENGG", "233"]}, {"course": ["ENDG", "233"]}]}, "coreq": {}}\n{"cid": "160417", "prereq": {"some": [{"course": ["CPSC", "217"]}, {"course": ["DATA", "211"]}]}, "antireq": {"none": [{"course": ["CPSC", "219"]}, {"course": ["CPSC", "233"]}, {"course": ["CPSC", "235"]}, {"course": ["ENEL", "497"]}, {"course": ["ENCM", "493"]}]}, "coreq": {}}\n{"cid": "107164", "prereq": {"some": [{"var": "programs.CSCS"}, {"var": "programs.BINFO"}, {"var": "programs.NATSCI.CSCS"}]

In [166]:
messages.data

[ThreadMessage(id='msg_4y8cjMUilXBR6wqhMU1uXYzo', assistant_id='asst_FPCQ6UhNeRHdKcptjZjW6t4V', content=[MessageContentText(text=Text(annotations=[], value='{"cid": "107154", "prereq": {}, "antireq": {"!=": [{"some": {"var": "programs"}}, "CS"]}, "coreq": {}}\n{"cid": "160416", "prereq": {}, "antireq": {"none": [{"course": ["CPSC", "217"]}, {"course": ["CPSC", "215"]}, {"course": ["CPSC", "231"]}, {"course": ["CPSC", "235"]}, {"course": ["DATA", "211"]}, {"course": ["ENCM", "339"]}, {"course": ["ENGG", "233"]}, {"course": ["ENDG", "233"]}]}, "coreq": {}}\n{"cid": "160417", "prereq": {"some": [{"course": ["CPSC", "217"]}, {"course": ["DATA", "211"]}]}, "antireq": {"none": [{"course": ["CPSC", "219"]}, {"course": ["CPSC", "233"]}, {"course": ["CPSC", "235"]}, {"course": ["ENEL", "497"]}, {"course": ["ENCM", "493"]}]}, "coreq": {}}\n{"cid": "107164", "prereq": {"some": [{"var": "programs.CSCS"}, {"var": "programs.BINFO"}, {"var": "programs.NATSCI.CSCS"}]}, "antireq": {"none": [{"course": 

In [167]:
response_text = messages.data[0].content[0].text.value
print(response_text)

{"cid": "107154", "prereq": {}, "antireq": {"!=": [{"some": {"var": "programs"}}, "CS"]}, "coreq": {}}
{"cid": "160416", "prereq": {}, "antireq": {"none": [{"course": ["CPSC", "217"]}, {"course": ["CPSC", "215"]}, {"course": ["CPSC", "231"]}, {"course": ["CPSC", "235"]}, {"course": ["DATA", "211"]}, {"course": ["ENCM", "339"]}, {"course": ["ENGG", "233"]}, {"course": ["ENDG", "233"]}]}, "coreq": {}}
{"cid": "160417", "prereq": {"some": [{"course": ["CPSC", "217"]}, {"course": ["DATA", "211"]}]}, "antireq": {"none": [{"course": ["CPSC", "219"]}, {"course": ["CPSC", "233"]}, {"course": ["CPSC", "235"]}, {"course": ["ENEL", "497"]}, {"course": ["ENCM", "493"]}]}, "coreq": {}}
{"cid": "107164", "prereq": {"some": [{"var": "programs.CSCS"}, {"var": "programs.BINFO"}, {"var": "programs.NATSCI.CSCS"}]}, "antireq": {"none": [{"course": ["CPSC", "231"]}, {"course": ["CPSC", "215"]}, {"course": ["CPSC", "217"]}, {"course": ["CPSC", "235"]}, {"course": ["DATA", "211"]}, {"course": ["ENCM", "339"]

In [168]:
# Split into list of dicts
response_dicts = response_text.split("\n")

# Convert to list of dicts
response_dicts = list(map(lambda x: json.loads(x), response_dicts))

response_dicts

[{'cid': '107154',
  'prereq': {},
  'antireq': {'!=': [{'some': {'var': 'programs'}}, 'CS']},
  'coreq': {}},
 {'cid': '160416',
  'prereq': {},
  'antireq': {'none': [{'course': ['CPSC', '217']},
    {'course': ['CPSC', '215']},
    {'course': ['CPSC', '231']},
    {'course': ['CPSC', '235']},
    {'course': ['DATA', '211']},
    {'course': ['ENCM', '339']},
    {'course': ['ENGG', '233']},
    {'course': ['ENDG', '233']}]},
  'coreq': {}},
 {'cid': '160417',
  'prereq': {'some': [{'course': ['CPSC', '217']},
    {'course': ['DATA', '211']}]},
  'antireq': {'none': [{'course': ['CPSC', '219']},
    {'course': ['CPSC', '233']},
    {'course': ['CPSC', '235']},
    {'course': ['ENEL', '497']},
    {'course': ['ENCM', '493']}]},
  'coreq': {}},
 {'cid': '107164',
  'prereq': {'some': [{'var': 'programs.CSCS'},
    {'var': 'programs.BINFO'},
    {'var': 'programs.NATSCI.CSCS'}]},
  'antireq': {'none': [{'course': ['CPSC', '231']},
    {'course': ['CPSC', '215']},
    {'course': ['CPSC', 