In [53]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.io as pio
import plotly.express as px
import os
import sys
from utils import fuzzy_merge

from ast import literal_eval

try:
    from cfuzzyset import cFuzzySet as FuzzySet
except ImportError:
    from fuzzyset import FuzzySet

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)


In [54]:
pt_df = pd.read_csv("./data/pt_ratings.csv")
rmp_df = pd.read_csv("./data/rmp_ratings.csv")
salaries_df = pd.read_csv("./data/salaries.csv")

In [55]:
# Join names to help name search in salaries
salaries_df["name"] = salaries_df["employee"].apply(
    lambda x: " ".join(x.split(", ")[::-1])
)

pt_df = pt_df[pt_df["reviews"] != "[]"]
rmp_df = rmp_df[rmp_df["reviews"] != "[]"]

In [56]:
pt_df

Unnamed: 0,courses,average_rating,type,reviews,name,slug
6,"['ENME674', 'ENMA300', 'ENME684', 'ENME489Z', ...",5.0000,professor,"[{'professor': 'Abhijit Dasgupta', 'course': '...",Abhijit Dasgupta,dasgupta_abhijit
8,"['ARTH389L', 'ARTH255', 'ARTH768', 'ARTH668A',...",2.8333,professor,"[{'professor': 'Abigail McEwen', 'course': Non...",Abigail McEwen,mcewen
11,"['PHYS405', 'PHYS275', 'PHYS758E', 'PHYS273', ...",4.0000,professor,"[{'professor': 'Abolhassan Jawahery', 'course'...",Abolhassan Jawahery,jawahery
12,"['STAT701', 'STAT700', 'STAT750', 'STAT650', '...",2.7000,professor,"[{'professor': 'Abram Kagan', 'course': 'STAT4...",Abram Kagan,kagan
14,"['ENGL101', 'ENGL243', 'ENGL101S', 'PHSC497', ...",5.0000,professor,"[{'professor': 'Adam Binkley', 'course': 'ENGL...",Adam Binkley,binkley
...,...,...,...,...,...,...
11835,[],2.0000,professor,"[{'professor': 'Lance Shapiro', 'course': 'BSC...",Lance Shapiro,shapiro_lance
11836,[],5.0000,ta,"[{'professor': 'Livingstone Imonitie', 'course...",Livingstone Imonitie,imonitie
11837,[],5.0000,ta,"[{'professor': 'Kathryn Lawless', 'course': 'U...",Kathryn Lawless,lawless_kathryn
11838,[],5.0000,ta,"[{'professor': 'Matt Finnan', 'course': 'ENEE1...",Matt Finnan,finnan_matt


In [57]:
merge_pt = fuzzy_merge(pt_df, salaries_df, fuzz_on="name", how="inner")
merge_pt


Unnamed: 0,courses,average_rating,type,reviews,name_x,slug,name,year,employee,department,division,title,salary,name_y
0,"['ENME674', 'ENMA300', 'ENME684', 'ENME489Z', ...",5.0000,professor,"[{'professor': 'Abhijit Dasgupta', 'course': '...",Abhijit Dasgupta,dasgupta_abhijit,Abhijit Dasgupta,2013,"Dasgupta, Abhijit",ENGR-Mechanical Engineering,A. James Clark School of Engineering,Prof,"$167,138.22",Abhijit Dasgupta
1,"['ENME674', 'ENMA300', 'ENME684', 'ENME489Z', ...",5.0000,professor,"[{'professor': 'Abhijit Dasgupta', 'course': '...",Abhijit Dasgupta,dasgupta_abhijit,Abhijit Dasgupta,2014,"Dasgupta, Abhijit",ENGR-Mechanical Engineering,A. James Clark School of Engineering,Prof,"$183,580.92",Abhijit Dasgupta
2,"['ENME674', 'ENMA300', 'ENME684', 'ENME489Z', ...",5.0000,professor,"[{'professor': 'Abhijit Dasgupta', 'course': '...",Abhijit Dasgupta,dasgupta_abhijit,Abhijit Dasgupta,2015,"Dasgupta, Abhijit",ENGR-Mechanical Engineering,A. James Clark School of Engineering,Prof,"$190,895.40",Abhijit Dasgupta
3,"['ENME674', 'ENMA300', 'ENME684', 'ENME489Z', ...",5.0000,professor,"[{'professor': 'Abhijit Dasgupta', 'course': '...",Abhijit Dasgupta,dasgupta_abhijit,Abhijit Dasgupta,2016,"Dasgupta, Abhijit",ENGR-Mechanical Engineering,A. James Clark School of Engineering,Prof,"$190,895.40",Abhijit Dasgupta
4,"['ENME674', 'ENMA300', 'ENME684', 'ENME489Z', ...",5.0000,professor,"[{'professor': 'Abhijit Dasgupta', 'course': '...",Abhijit Dasgupta,dasgupta_abhijit,Abhijit Dasgupta,2017,"Dasgupta, Abhijit",ENGR-Mechanical Engineering,A. James Clark School of Engineering,Prof,"$198,038.26",Abhijit Dasgupta
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13971,"['STAT400', 'MATH241H']",3.6667,professor,"[{'professor': 'Ke Chen', 'course': 'STAT400',...",Ke Chen,chen_ke,Jie Chen,2019,"Chen, Jie",SPHL-Health Services Administration,School of Public Health,Assoc Prof,"$122,400.00",Jie Chen
13972,"['STAT400', 'MATH241H']",3.6667,professor,"[{'professor': 'Ke Chen', 'course': 'STAT400',...",Ke Chen,chen_ke,Jie Chen,2020,"Chen, Jie",SPHL-Health Policy and Management,School of Public Health,Associate Professor,"$141,400.51",Jie Chen
13973,"['STAT400', 'MATH241H']",3.6667,professor,"[{'professor': 'Ke Chen', 'course': 'STAT400',...",Ke Chen,chen_ke,Jie Chen,2021,"Chen, Jie",SPHL-Health Policy and Management,School of Public Health,Professor,"$159,528.52",Jie Chen
13974,"['STAT400', 'MATH241H']",3.6667,professor,"[{'professor': 'Ke Chen', 'course': 'STAT400',...",Ke Chen,chen_ke,Jie Chen,2022,"Chen, Jie",SPHL-Health Policy and\nManagement,School of Public\nHealth,Professor,"$165,151.91",Jie Chen


In [58]:
merge_rmp = fuzzy_merge(rmp_df, salaries_df, fuzz_on="name", how="inner")
merge_rmp

Unnamed: 0,name_x,rating,courses,reviews,name,year,employee,department,division,title,salary,name_y
0,Pamela Abshire,3.333333,"['ENEE419A', 'ENEE408D']","[{'professor': 'Pamela Abshire', 'course': 'EN...",Pamela A. Abshire,2013,"Abshire, Pamela A.",ENGR-Electrical & Computer Engineering,A. James Clark School of Engineering,Assoc Prof,"$82,872.96",Pamela A. Abshire
1,Pamela Abshire,3.333333,"['ENEE419A', 'ENEE408D']","[{'professor': 'Pamela Abshire', 'course': 'EN...",Pamela A. Abshire,2013,"Abshire, Pamela A.",ENGR-Institute for Systems Research,A. James Clark School of Engineering,Assoc Prof,"$55,149.36",Pamela A. Abshire
2,Pamela Abshire,3.333333,"['ENEE419A', 'ENEE408D']","[{'professor': 'Pamela Abshire', 'course': 'EN...",Pamela A. Abshire,2013,"Abshire, Pamela A.",UGST-Honors College,Undergraduate Studies,Lecturer,"$5,000.00",Pamela A. Abshire
3,Pamela Abshire,3.333333,"['ENEE419A', 'ENEE408D']","[{'professor': 'Pamela Abshire', 'course': 'EN...",Pamela A. Abshire,2014,"Abshire, Pamela A.",ENGR-Electrical & Computer Engineering,A. James Clark School of Engineering,Assoc Prof,"$82,427.95",Pamela A. Abshire
4,Pamela Abshire,3.333333,"['ENEE419A', 'ENEE408D']","[{'professor': 'Pamela Abshire', 'course': 'EN...",Pamela A. Abshire,2014,"Abshire, Pamela A.",ENGR-Institute for Systems Research,A. James Clark School of Engineering,Assoc Prof,"$66,496.05",Pamela A. Abshire
...,...,...,...,...,...,...,...,...,...,...,...,...
11085,Alison Burns,4.363636,"['CPMS100', 'JOUR360', 'JOUR661', 'CPMS225']","[{'professor': 'Alison Burns', 'course': 'CPMS...",Alison Burns,2022,"Burns, Alison",JOUR-Philip Merrill College of\nJournalism,Philip Merrill\nCollege of\nJournalism,Lecturer,"$60,600.00",Alison Burns
11086,Jainaba Ceesay,2.000000,['BMGT495'],"[{'professor': 'Jainaba Ceesay', 'course': 'BM...",Jainaba Ceesay,2022,"Ceesay, Jainaba",BMGT-Management &\nOrganization,Robert H. Smith\nSchool of\nBusiness,Lecturer,"$33,000.00",Jainaba Ceesay
11087,Ashley Pantaleao,4.000000,['FMSC477'],"[{'professor': 'Ashley Pantaleao', 'course': '...",Ashley Pantaleao,2022,"Pantaleao, Ashley",SPHL-School of Public Health,School of Public\nHealth,Lecturer,"$11,616.54",Ashley Pantaleao
11088,Alexandra Harlig,4.500000,['HNUH288X'],"[{'professor': 'Alexandra Harlig', 'course': '...",Alexandra Harlig,2022,"Harlig, Alexandra",UGST-Honors College,Undergraduate\nStudies,Asst Clinical\nProfessor,"$69,323.45",Alexandra Harlig


In [59]:
reviews = []

for name, rows in merge_pt.merge(merge_rmp, how="outer").groupby("name"):
    for rs in map(literal_eval, rows["reviews"].unique()):
        for r in rs:
            reviews.append({**r, "professor": name})

reviews = pd.DataFrame(reviews)
reviews = reviews.drop(columns=["expected_grade"])
reviews = reviews.rename(columns={"professor": "name"})
reviews["created"] = pd.to_datetime(reviews["created"].str.replace("UTC", ""))


In [60]:
reviews

Unnamed: 0,name,course,review,rating,created
0,A W. Kruglanski,PSYC489H,"DO NOT TAKE PSYC489H ""Motivated Social Cogniti...",2,2015-09-07 18:44:00+00:00
1,A.U. Shankar,CMSC412,Lectures are pretty dry and difficult to follo...,3,2013-01-02 21:32:00+00:00
2,A.U. Shankar,CMSC412,"Professor: He does have a stutter, but if you ...",3,2012-12-23 03:51:00+00:00
3,A.U. Shankar,CMSC412,This is a horrible class. The projects are imp...,1,2012-10-29 00:54:00+00:00
4,A.U. Shankar,CMSC412,I have a lot of respect for Dr. Shankar. He is...,5,2012-05-24 13:00:00+00:00
...,...,...,...,...,...
27879,Zsuzsa Daczo,SOCY105,"Thoughtful, kind, and really funny. She talks ...",5,2021-01-22 22:01:58+00:00
27880,Zsuzsa Daczo,SOCY325,"I think she truly cares about her students, ho...",3,2019-12-08 22:15:07+00:00
27881,Zsuzsa Daczo,SOCY105,"She is so thoughtful, caring, and understandin...",5,2019-05-15 01:37:28+00:00
27882,Zsuzsa Daczo,SOCY227,WONDERFUL,5,2014-12-08 01:18:14+00:00


In [61]:
reviews[reviews.name == "Jose M Calderon"]

Unnamed: 0,name,course,review,rating,created
12645,Jose M Calderon,CMSC430,"honestly the coolest dude i've ever met, reall...",5,2020-11-30 02:08:37+00:00
12646,Jose M Calderon,CMSC430,He's a great teacher. He takes time to explain...,5,2020-12-16 00:10:12+00:00
12647,Jose M Calderon,CMSC430,Jose is super nice and understanding of studen...,5,2020-12-16 16:44:10+00:00
12648,Jose M Calderon,CMSC430,Fantastic professor. Knows his stuff and he cl...,5,2020-12-20 04:46:28+00:00
12649,Jose M Calderon,CMSC430,very chill professor,5,2021-02-01 19:38:40+00:00
12650,Jose M Calderon,CMSC430,Such a great professor. Literally so sweet and...,5,2021-02-13 07:43:17+00:00
12651,Jose M Calderon,CMSC320,"Great professor, cares about his students and ...",5,2021-03-07 19:17:09+00:00
12652,Jose M Calderon,CMSC430,Fall 2020 Semester. MVP. I'm not aware of anot...,5,2021-05-19 06:11:06+00:00
12653,Jose M Calderon,CMSC320,I had an amazing time in Professor Calderon's ...,5,2021-05-23 03:08:53+00:00
12654,Jose M Calderon,CMSC320,Probably the best CS teacher I've ever had at ...,5,2021-05-28 03:35:28+00:00


In [62]:
reviews.to_csv("./data/reviews.csv", lineterminator='\r\n')