In [9]:
import ratemyprofessor


from openai import OpenAI
from pinecone import Pinecone



In [10]:
import os
from dotenv import load_dotenv

load_dotenv()


True

In [11]:
def get_average_grade(ratings):
    grade = 0
    total_grades = 0
    for rating in ratings:
        if len(rating.grade) == 1:
            total_grades += 1
            if rating.grade == "A" or rating.grade == 'A+':
                grade += 93
            elif rating.grade == "B" or rating.grade == 'B+':
                grade += 83
            elif rating.grade == "C" or rating.grade == 'C+':
                grade += 73
            elif rating.grade == "D" or rating.grade == 'D+':
                grade += 63
            elif rating.grade == "F" or rating.grade == 'F+':
                grade += 59
            elif rating.grade == 'A-':
                grade += 90
            elif rating.grade == 'B-':
                grade += 80
            elif rating.grade == 'C-':
                grade += 70
            elif rating.grade == 'D-':
                grade += 60
    if total_grades == 0:
        return "No grades available"
    return round(grade / total_grades, 1)

In [12]:
john = ratemyprofessor.get_professor_by_school_and_name(
    ratemyprofessor.get_school_by_name("University of Delaware"), "Aromando")
print(john)

John Aromando


In [13]:
if john is not None:
    print("%s works in the %s Department of %s." % (john.name, john.department, john.school.name))
    print("Rating: %s / 5.0" % john.rating)
    print("Difficulty: %s / 5.0" % john.difficulty)
    print("Total Ratings: %s" % john.num_ratings)
    if john.would_take_again is not None:
        print(("Would Take Again: %s" % round(john.would_take_again, 1)) + '%')
    else:
        print("Would Take Again: N/A")

John Aromando works in the Computer Science Department of University of Delaware.
Rating: 4.5 / 5.0
Difficulty: 2.3 / 5.0
Total Ratings: 10
Would Take Again: 100%


In [14]:
johnRatings = john.get_ratings()
print(get_average_grade(johnRatings))

89.0


Main pinecone embedding loop. The idea is to create an array of names of professors from the computer science department at UD that I know are in the RMP database. Then we loop over these names gathering the scraped data from the rateMyProf api. We then create pinecone embeddings for each of theser professors with data such as their name, department, rating, difficulty, would take again, top comments, average grade, etc.

In [15]:
UD_Professors = ['Bart', 'Aromando', 'Silber', 'Hsu', 'Decker', 'Keffer', 'Roosen', 'Schiller', 'Ilya Safro', 'Siegel', 'Wassil', 'Belford', 'Barmaki', 'Chandrasekaran', 'Beheshti', 'Clause', 'Nazim Karaca', 'Liao', 'Lena Mashayekhy', 'McCoy', 'Xi Peng', 'Lori Pollock', 'Adarsh Sethi', 'Vijay Shanker', 'Yarrington', 'Zhang']

In [29]:
def get_top_comments(ratings):
    top_comments = ""
    for index, rating in enumerate(ratings): 
        if index >= 10:
            break
        top_comments += f"Comment {index + 1}, Class {rating.class_name}: {rating.comment} | "
    return top_comments


def get_courses(ratings):
    courses = set()
    for rating in ratings:
        courses.add(rating.class_name)
    return courses

def set_to_string(courses):
    courses_string = ""
    for index, course in enumerate(courses):
        courses_string += f"{course}"
        if index != len(courses) - 1:
            courses_string += ", "
    return courses_string



In [18]:
prof_objects = []
for professor in UD_Professors:
    prof = ratemyprofessor.get_professor_by_school_and_name(
    ratemyprofessor.get_school_by_name("University of Delaware"), professor)
    ratings = prof.get_ratings()
    average_grade = get_average_grade(ratings)
    print(prof.name)
    prof_object = {
        "name": prof.name,
        "department": prof.department,
        "rating": prof.rating,
        "difficulty": prof.difficulty,
        "would_take_again": prof.would_take_again,
        "top_comments": get_top_comments(ratings),
        "average_grade": average_grade,
        "courses": get_courses(ratings)
    }
    prof_objects.append(prof_object)

for object in prof_objects:
    print(object)

        

  self.comment = BeautifulSoup(comment, "lxml").text


Austin Bart
John Aromando
Greg Silber
Leighanne Hsu
Keith Decker
Jeremy Keffer
Andrew Roosen
Ulf Schiller
Ilya Safro
Stephen Siegel
Katherine Wassil
Lochlan  Belford
Roghayeh Barmaki
Sunita Chandrasekaran
Rahmatollah Beheshti
James Clause
Nazim Karaca
Li Liao
Lena Mashayekhy
Kathleen McCoy
Xi Peng
Lori Pollock
Adarsh Sethi
Vijay Shanker
Debra Yarrington
Shangyou Zhang
{'name': 'Austin Bart', 'department': 'Computer Science', 'rating': 3.9, 'difficulty': 2.9, 'would_take_again': 85.3333, 'top_comments': "Comment 1, Class CISC181: He is definitely condescending at times but only if you are asking dumb questions. He is the best computer science professor I have had. There is one homework a week and it is pretty easy. I am surprised with how structured this class was considering this was the first year they taught TypeScript. The final project was DIFFICULT. | Comment 2, Class CISC108: Although the class was quite easy, Bart is a very terrible professor. He is very condescending and gets a

In [30]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
client = OpenAI()
processed_profs = []

for prof in prof_objects:
    response = client.embeddings.create(
        input={prof['top_comments']},
        model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_profs.append({
        "values": embedding,
        "id": prof['name'],
        "metadata": {
            "reviews": prof["top_comments"],
            "department": prof["department"],
            "rating": prof["rating"],
            "difficulty": prof["difficulty"],
            "would_take_again": prof["would_take_again"],
            "average_grade": prof["average_grade"],
            "courses": set_to_string(prof["courses"])
        }
    })
    


In [31]:
processed_profs[0]

{'values': [-0.044414736,
  -0.005643468,
  -0.04137412,
  0.01321447,
  0.036623154,
  -0.023591936,
  -0.0039602686,
  0.0010528478,
  0.014280044,
  -0.018012945,
  0.006569906,
  -0.01321447,
  0.017483551,
  0.024175625,
  0.017076327,
  -0.0026198179,
  0.02461,
  -0.038767874,
  -0.004336952,
  0.042351462,
  0.03149211,
  0.0070585767,
  0.054595377,
  -0.0003877127,
  -0.046586607,
  -0.03119348,
  0.017524274,
  -0.0018308184,
  0.0021837472,
  0.015474572,
  0.025695935,
  -0.016804842,
  -0.008687479,
  -0.03540148,
  0.0044116103,
  -0.0011792574,
  -0.0077780085,
  0.0043607075,
  0.03798057,
  -0.007078938,
  0.023116838,
  0.022329535,
  -0.015637463,
  -0.071834594,
  -0.00885037,
  -0.028478643,
  0.02078208,
  0.0038957915,
  0.011191917,
  0.02967317,
  -0.09800562,
  0.046152234,
  0.052912176,
  0.068739675,
  -0.07596114,
  0.007092512,
  -0.039962403,
  0.018406596,
  -0.028750125,
  -0.04221572,
  0.123253606,
  -0.011687374,
  -0.0046050427,
  -0.03279523,
  -

In [32]:
index = pc.Index("rag")
index.upsert(
    vectors=processed_profs,
    namespace="UDCS2"
)

{'upserted_count': 26}

In [33]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'UDCS': {'vector_count': 26},
                'UDCS2': {'vector_count': 26},
                'ns1': {'vector_count': 20}},
 'total_vector_count': 72}