In [1]:
import json
import os
from openai import OpenAI

# Initialize OpenAI client once with API key from environment variable
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# All comments are in this file
with open('unclassified_comments.json', 'r') as file:
    data = json.load(file)
    comments = data['comments']

# Examples of positive and negative comments for few-shot learning
positive_examples = [
    "The instructor had extremely fair grading and opportunities to achieve the maximum credit possible on assignments. Additionally, the professor ensured that a student taking the course could succeed by providing ample time for office hours and access to lecture slides.",
    "I enjoyed the way he has structured his class online and in person class. He teaches and then does engaging activities relating to the topic. Not only that, he's great engaging with us! He tries to learn our names and always makes sure we are doing well.", 
    "The professor was flexible with due dates",
    "Gives many opportunities to do do work and understand what we are doing "
]
negative_examples = [
    "go over problems and codes so people can see more about it",
    "The most helpful for learning in this class was when we had to work with others in class, I think it would have gone more smoothly though if others were more open to interacting with classmates. ",
    "The course was mainly based off of coursework done through ZyBooks. The instructor did not do much bedsides introduce the basic concept for the week and discuss deadlines in regards to project materials.",
    "I would like if the course was a little slower."
]

# The classifier function
def classify_comment(comment):
    prompt = "\n".join(
        [f"Comment: {example}\nClassification: Positive" for example in positive_examples] +
        [f"Comment: {example}\nClassification: Negative" for example in negative_examples]
        # [f"Comment: {comment}\nClassification:"]
    )
    
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": prompt,
            },
            {
                "role":"user",
                "content":f"Comment: {comment}\nClassification:"
            }
        ],
        max_tokens=1
    )
    return response

# Classify each comment and store
classified_comments = {'positive_comments': [], 'negative_comments': []}

for comment in comments:
    response = classify_comment(comment)
    classification = response.choices[0].message.content
    if classification.lower() == 'positive':
        classified_comments['positive_comments'].append(comment)
    else:
        classified_comments['negative_comments'].append(comment)

# Save a new JSON file
with open('classified_comments.json', 'w') as file:
    json.dump(classified_comments, file, indent=4)

print("Comments have been classified and saved to classified_comments.json.")


Comments have been classified and saved to classified_comments.json.


In [2]:
# find length of each list in classified_comments.json, add up, and verify that it is same as original number of comments in unclassified_comments.json
with open('classified_comments.json', 'r') as file:
    classified_data = json.load(file)
    positive_count = len(classified_data['positive_comments'])
    negative_count = len(classified_data['negative_comments'])
    total_classified = positive_count + negative_count
    print(f"Total classified comments: {total_classified}")
    print(f"Positive comments: {positive_count}, Negative comments: {negative_count}")
    print(f"Original number of comments: {len(comments)}")
    assert total_classified == len(comments), "Mismatch in number of comments!"

Total classified comments: 562
Positive comments: 295, Negative comments: 267
Original number of comments: 562
