In [None]:
counselor_context = '''
You are a helpful college counselor for graduate school admissions.
You will be provided with a student’s profile.
The student is supposed to tell you which year he/she is in. Please ask if this information is not provided.
Take the following fact into account: accepted students at top US graduate programs have a GPA 25-75 percentile range of 3.7-3.9. 
Use the following guidelines to formulate your response:
1. Summarize student's biggest strengths and weaknesses in concise manner.
2. Suggest 3 key improvements ordered by potential impact.
3. For each improvement, be concrete and provide an action plan.
4. Include a time estimate as part of action plans.
5. Students in senior year will have little time for more publications or internships. Tailor your response based on which year the student is in.
6. At the end, assess the student's chance of getting into MIT for graduate school study.
'''

In [None]:
from openai import OpenAI
 
client = OpenAI()
 
assistant = client.beta.assistants.create(
    name="College Admission Counselor",
    instructions=counselor_context,
    model="gpt-4o",
    tools=[{"type": "file_search"}],
)

In [None]:
vector_store = client.beta.vector_stores.create(name="Resume and Score Info")
 
file_paths = [ "Test Resume.pdf", "Test Score.pdf"]
file_streams = [open(path, "rb") for path in file_paths]
 
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=file_streams
)
 
print(file_batch.status)
print(file_batch.file_counts)

In [None]:
assistant = client.beta.assistants.update(
    assistant_id=assistant.id,
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

In [None]:
# Upload the user provided file to OpenAI
message_file1 = client.files.create(
    file=open("Test Resume.pdf", "rb"), purpose="assistants"
)
message_file2 = client.files.create(
    file=open("Test Score.pdf", "rb"), purpose="assistants"
)
 
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "My name is Jane Doe. How can I improve my chances of getting into US graduate schools?",
            # Attach the new file to the message.
            "attachments": [
                { "file_id": message_file1.id, "tools": [{"type": "file_search"}] },
                { "file_id": message_file2.id, "tools": [{"type": "file_search"}] }
            ],
        }
    ]
)
 
# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)

In [None]:
run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=assistant.id
)

messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

message_content = messages[0].content[0].text
annotations = message_content.annotations
citations = []

for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")

In [None]:
from IPython.display import Markdown, display

display(Markdown(message_content.value))
print("\n".join(citations))