In [None]:
import openai
import os
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
import json
import yaml
import hashlib

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
openai.api_key  = os.getenv('OPENAI_API_KEY')

In [None]:
def load_student_answers(student_id):
    """
    Reads topics from txt files and adds to a dataframe. Each topic can be in a separate file
    """
    exam = []
    #path where we store the exam topic txt files
    exam_answers_path = os.getenv("EXAM_ANSWERS")

    #read in the exam topic files and return as a pandas DataFrame
    student_answers = pd.read_csv(os.path.join(exam_answers_path,student_id,"student_answers.csv"))
    return student_answers
    

In [59]:
def classroom_report(student_ids):
    
    classroom_data = []

    exam_answers_path = os.getenv("EXAM_ANSWERS")
    for student_id in os.listdir(exam_answers_path):
        try:
            student_answers = pd.read_csv(os.path.join(exam_answers_path,student_id,"student_answers_scored.csv"))
            
            with open(os.path.join(exam_answers_path,student_id,"config.yaml"), 'r') as file:
                config = yaml.safe_load(file)
            config["mean_score"] = student_answers['Rating'].mean()
            config["median_score"] = student_answers['Rating'].median()
            config["student_id"] = student_id
            classroom_data.append(config)
        except:
            pass
    
    classroom_data = pd.DataFrame(classroom_data)
    
    #if "classroom_data.csv" in os.listdir(exam_answers_path):
        #existing_classroom_data = pd.read_csv(os.path.join(exam_answers_path,"classroom_data.csv"))
        #classroom_data = pd.concat([classroom_data, existing_classroom_data])

    classroom_data = classroom_data.sort_values(by="mean_score",ascending=False)
    classroom_data.to_csv(os.path.join(exam_answers_path,"classroom_data.csv"),index=None)


ac7fd72d0e90f4f7569b8eda89e0421c
f9e00b77f118b041da89e803091cce78
46c996fe3d9c85c8f719d000bdee455d
2ed9ed20bdf66c2f5e14ca590608d17f
e6d1c57ce083e1bca7d96b508b36c677
13cc4000c382b5668120e67969b4d378
25a973408e531d90b48c8621a6184aea
fa6888b1b4b750fb3da91a98d1692199
7f87ce89256e2222c5d26691ddec4848
8604d1b7291b4c12abf27d2082513251
a792a4a08103bc2e509300f2a3149069
22fee230421fb54af61f3eb4eb6feeef
dc0d047d53f69887389775a88b81f84f
93a7dd191c53ba3166f328418f806ed5
6ee4dbd9441adb27e878e366deba2073
8becbd312e5d194b4a1c223b86168bb1
29d56042cfc1bd6008d88a03fc89e8ab


In [40]:
def get_student_ids_for_scoring():
    """Look at the existing classroom_data and only select new student_ids 
    that have not already been scored"""
    exam_answers_path = os.getenv("EXAM_ANSWERS")
    classroom_data = pd.read_csv(os.path.join(exam_answers_path,"classroom_data.csv"))
    scored_students = list(classroom_data["student_id"])
    student_ids  = os.listdir(exam_answers_path)
    new_students = [student_id for student_id in  student_ids if student_id not in scored_students and len(student_id) == 32]
    return new_students
    


In [48]:
get_student_ids_for_scoring()

['ac7fd72d0e90f4f7569b8eda89e0421c',
 '2ed9ed20bdf66c2f5e14ca590608d17f',
 'e6d1c57ce083e1bca7d96b508b36c677',
 'fa6888b1b4b750fb3da91a98d1692199',
 '8604d1b7291b4c12abf27d2082513251',
 'a792a4a08103bc2e509300f2a3149069',
 '93a7dd191c53ba3166f328418f806ed5',
 '8becbd312e5d194b4a1c223b86168bb1']

In [None]:
def student_answer_to_long(student_answers, num_questions):
    
    long_student_answers = pd.DataFrame()

    for i in range(1, num_questions + 1):
        temp_df = student_answers[[f'Question {i}', f'Student Answer {i}', f'Teacher Answer {i}', 'topic','topic_hash']].copy()
        temp_df.columns = ['Question', 'Student Answer', 'Teacher Answer', 'topic', 'topic_hash']
        long_student_answers = pd.concat([long_student_answers, temp_df], ignore_index=True)
    
    return long_student_answers


In [None]:
def report_card(exam_scores,student_id):
    exam_scores['Rating'] = exam_scores['Rating'].apply(int)
    print(f"""mean score {exam_scores['Rating'].mean()}, median score {exam_scores['Rating'].median()}""")
    path = os.path.join(os.getenv("EXAM_ANSWERS"),student_id,"student_answers_scored.csv")
    exam_scores.to_csv(path,index=None)

In [None]:
def score_exam(domain, exam_answers, prompt_template_fname, llm):

    """
    Given a set a topic, a set of questions and answers by a student and teacher we score
    how well the students answer follows the teacher answer.
    """    
    response_schemas = []
    rating_schema =  ResponseSchema(name=f"""Rating""",
                                          description=f"""Rating for the answer"""
                                         )
    response_schemas.append(rating_schema)
    

    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
    format_instructions = output_parser.get_format_instructions()

    prompt_path = os.path.join(os.getenv("PROMPTS"),prompt_template_fname)
    with open(prompt_path,"r") as file:
        template_string = file.read()
    prompt_template = ChatPromptTemplate.from_template(template_string)

    results = []
    
    for idx, row in tqdm(exam_answers.iterrows()):
        
        message = prompt_template.format_messages(
            context=row["topic"],
            domain=domain,
            question=row[f"Question"],
            model_answer=row["Teacher Answer"],
            student_answer=row["Student Answer"],
            format_instructions=format_instructions
            )

        response = llm(message)
        output_dict = output_parser.parse(response.content)
        output_dict["Question"] = row["Question"]
        output_dict["Teacher Answer"] = row["Teacher Answer"]
        output_dict["Student Answer"] = row["Student Answer"]
        results.append(output_dict)
        
    return pd.DataFrame(results)

## Score exam

In [49]:
#model = "gpt-3.5-turbo"
model = "gpt-4-turbo-preview"

#student_ids = ["2ed9ed20bdf66c2f5e14ca590608d17f","fa6888b1b4b750fb3da91a98d1692199"]
student_ids = get_student_ids_for_scoring()

domain = "HR and workforce transformation"
num_questions = 4
prompt_template_fname = "teacher_score_exam_prompt.txt"
llm = ChatOpenAI(temperature=0.0, model=model)

for student_id in student_ids:
    print(f"""scoring student {student_id}""")
    student_answers = load_student_answers(student_id)
    student_answers = student_answer_to_long(student_answers, num_questions)
    exam_scores = score_exam(domain, student_answers, prompt_template_fname, llm)
    report_card(exam_scores,student_id)


scoring student ac7fd72d0e90f4f7569b8eda89e0421c


56it [01:33,  1.67s/it]


mean score 8.285714285714286, median score 9.0
scoring student 2ed9ed20bdf66c2f5e14ca590608d17f


56it [01:25,  1.53s/it]


mean score 8.357142857142858, median score 9.0
scoring student e6d1c57ce083e1bca7d96b508b36c677


56it [01:26,  1.55s/it]


mean score 8.446428571428571, median score 9.0
scoring student fa6888b1b4b750fb3da91a98d1692199


56it [01:29,  1.60s/it]


mean score 8.357142857142858, median score 9.0
scoring student 8604d1b7291b4c12abf27d2082513251


56it [01:29,  1.59s/it]


mean score 7.0, median score 8.0
scoring student a792a4a08103bc2e509300f2a3149069


56it [01:28,  1.58s/it]


mean score 8.285714285714286, median score 9.0
scoring student 93a7dd191c53ba3166f328418f806ed5


56it [01:28,  1.58s/it]


mean score 8.464285714285714, median score 9.0
scoring student 8becbd312e5d194b4a1c223b86168bb1


56it [01:36,  1.71s/it]

mean score 6.946428571428571, median score 8.0





In [55]:
exam_answers_path = os.getenv("EXAM_ANSWERS")
student_ids  = os.listdir(exam_answers_path)
student_ids = [student_id for student_id in  student_ids if len(student_id) == 32]
classroom_data = classroom_report(student_ids)

In [56]:
classroom_data

In [54]:
exam_answers_path = os.getenv("EXAM_ANSWERS")
student_ids  = os.listdir(exam_answers_path)
student_ids = [student_id for student_id in  student_ids if len(student_id) == 32]
classroom_data = classroom_report(student_ids)

['ac7fd72d0e90f4f7569b8eda89e0421c',
 'f9e00b77f118b041da89e803091cce78',
 '46c996fe3d9c85c8f719d000bdee455d',
 '2ed9ed20bdf66c2f5e14ca590608d17f',
 'e6d1c57ce083e1bca7d96b508b36c677',
 '13cc4000c382b5668120e67969b4d378',
 '25a973408e531d90b48c8621a6184aea',
 'fa6888b1b4b750fb3da91a98d1692199',
 '7f87ce89256e2222c5d26691ddec4848',
 '8604d1b7291b4c12abf27d2082513251',
 'a792a4a08103bc2e509300f2a3149069',
 '22fee230421fb54af61f3eb4eb6feeef',
 'dc0d047d53f69887389775a88b81f84f',
 '93a7dd191c53ba3166f328418f806ed5',
 '6ee4dbd9441adb27e878e366deba2073',
 '8becbd312e5d194b4a1c223b86168bb1',
 '29d56042cfc1bd6008d88a03fc89e8ab']