In [116]:
import openai
from utils import load_api_key

In [117]:
openai.api_key = load_api_key("/Users/gursi/Desktop/openai-api.txt")

In [56]:
def note_generator(chat_completion: openai.ChatCompletion, outline: dict[str, list[str]], level: str, subject: str):
    prompt = engineer_prompt_note_gen(outline, level, subject)
    model_output = chat_completion.create(
        model = "gpt-3.5-turbo",
        messages = [
            # {"role": "system", "content":f"Your are a {level} {subject} notes generator."},
            {"role": "system", "content":f"Your are a {level} {subject} notes generator. Generate notes in following format: \n Topic heading: \n \t Notes..."},
            {"role": "user", "content":prompt}
        ]
    )
    model_output = parse_results(model_output)
    return model_output
        
    
def engineer_prompt_note_gen(outline: dict[str, list[str]], level: str, subject: str):
    prompt = f'Generate quick revision notes for me on the following topics for a {subject} course on a {level} level. Include all equations. Explain each variable and concept. Do not say "sure, here are some notes..".\n\n'
    for topic in outline:
        prompt += f"- {topic}\n"
        if outline[topic] is not None:
            for subtopic in outline[topic]:
                prompt += f"    - {subtopic}\n"
    return prompt


def parse_results(chatgpt_output: dict) -> str:
    return chatgpt_output["choices"][0]["message"]["content"]

In [118]:
openai.api_key = load_api_key("/Users/gursi/Desktop/openai-api.txt")
chat_complete = openai.ChatCompletion

In [57]:
outline = {
    "Electromagnetism":None,
    "Electricity":[
        "Resistance",
        "Energy and Power",
        "Kirchoff Laws",
        "Lenz laws"
    ],
    "Mechanics": [
        "SUVAT equations",
        "Terminal velocity",
        "Friction and Drag"
    ]
}
output = note_generator(chat_complete, outline, "High school", "Physics")

In [58]:
print(output)

Electromagnetism:
- Electromagnetic Force: A force of attraction or repulsion that acts between electrically charged particles or currents.
- Electric charges: The fundamental property of matter that causes it to experience a force when placed in an electromagnetic field.
- Magnetic field: A region around a magnetic material or moving electric charge within which the force of magnetism acts.
Equation: F = qE + qv × B
- Electromagnetic induction: A process where a conductor placed in a changing magnetic field causes a voltage to be induced across the conductor.
Equation: ε = -dΦ/dt

Electricity:
- Resistance: The property of a material that opposes the flow of electric current through it.
Equation: V = IR
- Energy and Power: Energy is the ability to do work. Power is the rate at which work is done.
Equations: E = Pt, P = VI
- Kirchoff Laws: Two laws used for solving electrical circuits, namely the current law and the voltage law.
Equations: ∑ I_in = ∑ I_out, ∑ V_loop = 0
- Lenz laws: A 

In [11]:
from utils import parse_results, parse_pdf, create_chat_object
from sklearn.cluster import KMeans
import numpy as np
import openai
import string

In [23]:
def extract_keywords_from_prompt(chat_completion: openai.ChatCompletion, prompt: str):
    model_output = chat_completion.create(
        model = "gpt-3.5-turbo",
        messages = [
            {"role": "system", "content":f"Your job is to extract key words from text. Generic words should never be extracted, only topic specific words."},
            {"role": "user", "content":f"Extract the keywords from the following instruction. Output a single list of comma separated values only once. \n\n {prompt}"}
        ]
    )
    output = parse_results(model_output)
    return output.split(",")

def generate_single_summary(chat_completion: openai.ChatCompletion, input_text: str, summary_prompt: str) -> str:
    model_output = chat_completion.create(
        model = "gpt-3.5-turbo",
        messages = [
            {"role": "system", "content":f"Your job is to summarize text based on a prompt. If relevant data is not found, return nothing."},
            {"role": "user", "content":f"{input_text} \n\n {summary_prompt}"}
        ]
    )
    output = parse_results(model_output)
    return output

def generate_summary(
        chat_completion: openai.ChatCompletion,
        text_body: str,
        prompt: str,
        buffer: int = 600
) -> str:
    keywords = extract_keywords_from_prompt(chat_completion, prompt)
    keywords = [k.translate(str.maketrans('', '', string.punctuation)).strip().lower() for k in keywords]
    print(f"{len(keywords)} keywords found...")
    kw = []
    [[kw.append(w) for w in word.split(" ")] for word in keywords]
    arr_text = np.array(text_body.lower().split())

    print("Matching keywords in text...")
    idxs = np.array([])
    max_idx = len(arr_text)
    for keyw in kw:
        kw_idxs = np.where(arr_text == keyw)[0] / max_idx
        idxs = np.concatenate([idxs, kw_idxs])

    print("Clustering...")
    kmeans = KMeans(n_clusters = len(keywords))
    _ = kmeans.fit_predict(idxs.reshape(-1, 1))
    centroid_idxs = list((kmeans.cluster_centers_ * len(arr_text)).astype(int).reshape(-1))

    print("Generating summary...")
    summaries = []
    for centroid_idx in centroid_idxs:
        text_input = list(arr_text[max(0, centroid_idx - buffer):min(len(arr_text), centroid_idx + buffer)])
        text_input = " ".join(text_input)
        summary = generate_single_summary(chat_completion, text_input, prompt)
        summaries.append(summary)
    summaries = " ".join(summaries)
    final_summary = generate_single_summary(chat_completion, summaries, prompt)
    return final_summary

In [24]:
text_body = parse_pdf("/Users/gursi/Desktop/nst.pdf")
prompt = "Summarize the loss function, optimization method and model architecture used in this study for me."
chat_completion = create_chat_object("/Users/gursi/Desktop/openai-api.txt")
summary = generate_summary(
    chat_completion,
    text_body,
    prompt,
)
print(summary)

4 keywords found...
Matching keywords in text...
Clustering...
Generating summary...
The study used an artificial neural system that separated image content from style using a VGG neural network and optimizing the white noise image through gradient descent. The loss function included a squared-error loss for feature representation and a mean-squared distance for style representation, with adjustable trade-off between the two. The model architecture consisted of 16 convolutional and 5 pooling layers, with max-pooling replaced by average pooling for image synthesis. Fully connected layers were not used, and the model was publicly available in the Caffe framework. The weighting factors for the contribution of each layer to the total loss were also included.


In [25]:
text_body = parse_pdf("/Users/gursi/desktop/223.pdf")
prompt = "Summarize linear dependence and independence for me."
chat_completion = create_chat_object("/Users/gursi/Desktop/openai-api.txt")
summary = generate_summary(
    chat_completion,
    text_body,
    prompt,
)
print(summary)

2 keywords found...
Matching keywords in text...
Clustering...
Generating summary...
Linear dependence and independence refer to whether a set of vectors can be expressed as linear combinations of each other or not. A basis for a subspace is a set of linearly independent vectors that span the subspace. Bases are not unique, but they all have the same number of vectors, called the dimension of the subspace, which tells us the maximum number of linearly independent vectors that can exist in the subspace. Linear independence is important in determining solutions to systems of linear equations and in finding bases and dimensions of vector spaces.


In [27]:
text_body = parse_pdf("/Users/gursi/desktop/eco.pdf")
prompt = "Summarize the topic on the intertemporal budget line."
chat_completion = create_chat_object("/Users/gursi/Desktop/openai-api.txt")
summary = generate_summary(
    chat_completion,
    text_body,
    prompt,
)
print(summary)

3 keywords found...
Matching keywords in text...
Clustering...
Generating summary...
The intertemporal budget line is a concept that represents the trade-off between current and future consumption. It shows the different combinations of present and future goods that a consumer can afford given their income and the interest rate. The slope of the line represents the opportunity cost of present consumption in terms of future consumption, and the intercepts represent the maximum levels of current and future consumption. The economic interpretation of the intercepts is that they represent the present value of future income or the future value of current income. A numerical example is provided to illustrate how the intertemporal budget line works in practice. Overall, the intertemporal budget line is a useful tool for analyzing the intertemporal allocation of resources and understanding how individuals make choices between present and future consumption. However, the given text does not con

In [62]:
text_body = parse_pdf("/Users/gursi/Desktop/test.pdf")

In [66]:
model_output = chat_completion.create(
        model = "gpt-3.5-turbo",
        messages = [
            {"role": "system", "content":f"Your job is to extract questions and their respective answers from the given texts. Output them in the given format: \n\n Question: \n Answer: \n"},
            {"role": "user", "content":f"{text_body[:-9000]} \n\n Make sure to extract questions and their answers."}
        ]
    )
output = parse_results(model_output)

In [67]:
print(output)

Question:
What are the instructions for the STA130 Winter 2023 Midterm Examination?

Answer:
All answers submitted must be original. Students are recommended to use a pencil. No calculators, electronics, or resources are permitted during the exam. Students are not allowed to take any items with them if they leave the exam room before turning in their exam.

Question:
What is the reason for the error in the code block provided in section 1 question 1 of the STA130 Winter 2023 Midterm Exam?

Answer:
There are at least three errors in the code snippet provided. The error is due to the fact that no value has been assigned for N, the lack of tidyverse being loaded, and the incorrect syntax for glimpse and as_tibble. 

Question:
What are the types of distributions shown in Figure 1 of the STA130 Winter 2023 Midterm Exam?

Answer:
Distribution (a) is uniform with a large spread from 0 to 10. Distribution (b) is symmetric with a center around 5. Distribution (c) is bimodal with a very large sp

In [80]:
output_dict = {}
for qna_pair in output.split("Question:"):
    if qna_pair != "":
        q, a = qna_pair.split("Answer:")
        q, a = q.strip(), a.strip()
        output_dict[q] = a

In [125]:
sample_question = list(output_dict.keys())[4]
expected_answer = output_dict[sample_question]
my_answer = "Missing values in the DOB field are represented with the NA values. The code will not produce correct outputs if these are not removed. You would remove them using the is.na() and filter() functions in R."

In [126]:
model_output = chat_completion.create(
        model = "gpt-3.5-turbo",
        messages = [
            {"role": "system", "content":f"You are a teacher and your job is to grade my answer based on the Expected Answer and Question. My answer need not exactly match the Expected Answer. Give your output in the following format: \n Score (Out of 100): \n Feedback: ...\n"},
            {"role": "user", "content":f"Question: {sample_question} \n Expected Answer: {expected_answer} \n My answer: {my_answer}"}
        ]
    )
output = parse_results(model_output)

In [127]:
print(output)

Score (Out of 100): 90 
Feedback: Your answer is accurate and shows a good understanding of the purpose of NA values in the DOB field in question 10. However, it could have been more detailed and specific with regards to what can happen when these missing entries are not removed. Remember to provide clear and complete answers that demonstrate your knowledge of the subject matter.


In [None]:
def grade_answer(question: str, answer: str, correct_answer: str) -> tuple[int, str]:
    model_output = chat_completion.create(
        model = "gpt-3.5-turbo",
        messages = [
            {"role": "system", "content":f"You are a teacher and your job is to grade my answer based on the Expected Answer and Question. My answer need not exactly match the Expected Answer. Give your output in the following format: \n Score (Out of 100): \n Feedback: ...\n"},
            {"role": "user", "content":f"Question: {question} \n Expected Answer: {correct_answer} \n My answer: {answer}"}
        ]
    )
    output = parse_results(model_output)
