In [1]:
# I had to do this adaptation to make rpy2 work:
# https://github.com/rpy2/rpy2/issues/1018
# C:\Users\pedro\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\rpy2

from rpy2 import situation
import os

print("Testing that R works from within Python using rpy2...")

try:
    import rpy2.robjects as robjects
except OSError as e:
    try:
        import os
        import platform
        
        r_home = situation.r_home_from_registry()
        r_bin = r_home + '\\bin\\x64\\'
        os.environ['R_HOME'] = r_home
        os.environ['PATH'] =  r_bin + ";" + os.environ['PATH']
        os.add_dll_directory(r_bin)

        print('R_HOME', r_home)
        import rpy2.robjects as robjects
    except OSError:
        raise(e)

print("R Worked from within Python using rpy2! You're good to go.")

Testing that R works from within Python using rpy2...
R_HOME C:\Program Files\R\R-4.3.1
R Worked from within Python using rpy2! You're good to go.


In [2]:
import os

print("To compute IRT thetas, we need to have the original ENEM microdata data available.")
print("Download ENEM microdata from https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/enem.")
enem_microdata_path = "../../data/raw-enem-exams"

#years = [2019, 2020, 2021, 2022]
years = [2022]
for year in years:
    assert os.path.exists(f"{enem_microdata_path}/microdados_enem_{year}") , f"ENEM microdata for year {year} NOT found!"
    print(f"ENEM microdata for year {year} found!")


To compute IRT thetas, we need to have the original ENEM microdata data available.
Download ENEM microdata from https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/enem.
ENEM microdata for year 2022 found!


In [3]:
import pandas as pd

# This is the function you want to call.
# 'response_pattern_filepath' must have a column RESPONSE_PATTERN containing 0's and 1's.
# And also a column CO_PROVA containing the ENEM exam code.
# 'itens_prova_filepath' must point to ENEM's official file that contains IRT params, e.g, ITENS_PROVA_2022.csv

def run_R_IRT_script(response_pattern_filepath, itens_prova_filepath):
    # Read the content of the R script file
    r_script_file = "fit_irt.R"
    with open(r_script_file, 'r') as file:
        r_script = file.read()

    print(response_pattern_filepath)
    print(itens_prova_filepath)
    
    robjects.r.assign('response_pattern_filepath', response_pattern_filepath)
    robjects.r.assign('file_itens_prova', itens_prova_filepath)
    robjects.r(r_script)



In [4]:

# This function reads a file such as exp_MT_2022_mistral_simple-zero-shot_bits_4_count_24.zip
# and gets theta IRT params.
def run_R_IRT_from(filepath):

    if "2022" in filepath:
        YEAR = "2022"
    elif "2021" in filepath:
        YEAR = "2021"
    elif "2020" in filepath:
        YEAR = "2020"
    elif "2019" in filepath:
        YEAR = "2019"
    else:
        raise Exception(f"YEAR not found in {filepath}")

    file_itens_prova = f"{enem_microdata_path}/microdados_enem_{YEAR}/DADOS/ITENS_PROVA_{YEAR}.csv"

    run_R_IRT_script( f"{filepath}/aggregated/majority_sample.csv", file_itens_prova)
    run_R_IRT_script( f"{filepath}/aggregated/samples.csv", file_itens_prova)
    run_R_IRT_script( f"{filepath}/aggregated/random_samples.csv", file_itens_prova)

    thetas_df = pd.read_csv(f"{filepath}/aggregated/samples_with_irt.csv")
    thetas_df.sort_values(by=['IRT_SCORE', 'CTT_SCORE'], ascending=False).to_csv(f"{filepath}/aggregated/samples_with_irt.csv")
    
    thetas_random_df = pd.read_csv(f"{filepath}/aggregated/random_samples_with_irt.csv")
    thetas_random_df.sort_values(by=['IRT_SCORE', 'CTT_SCORE'], ascending=False).to_csv(f"{filepath}/aggregated/random_samples_with_irt.csv")
    print("thetas.csv finished! rows:", thetas_df.shape[0])

    #average_df = thetas_df.groupby('CTT_SCORE')['IRT_SCORE'].agg({'Value': ['mean', 'median', 'min', 'max']})

    average_df = thetas_df.groupby('CTT_SCORE')['IRT_SCORE'].mean().reset_index()
    average_random_df = thetas_random_df.groupby('CTT_SCORE')['IRT_SCORE'].mean().reset_index()
    
    return thetas_df, average_df, thetas_random_df, average_random_df

In [5]:
import pandas as pd
df = pd.read_parquet("../../enem-experiments-results-processed.parquet")
df

Unnamed: 0,MODEL_NAME,MODEL_SIZE,TEMPERATURE,SYSTEM_PROMPT_TYPE,ENEM_EXAM,ENEM_EXAM_TYPE,QUESTION_ORDER,LANGUAGE,NUMBER_OPTIONS,SEED,CTT_SCORE,TX_RESPOSTAS,TX_GABARITO,RESPONSE_PATTERN,TOTAL_RUN_TIME_SEC,AVG_RUN_TIME_PER_ITEM_SEC,CO_PROVA
0,gpt-3.5-turbo-1106,,0.6,few-shot,ENEM_2019_CH_CO_PROVA_520,default,original,pt-br,5,2724839799,37,CBABADBBCEAEACBBDCBEDDBBEACBEACDBBABCCCEDACAC,CBABADBBCEEEBCBADCBEEDBBEADBBACDBBACCCCADACAC,111111111101011011110111110101111110111011111,97.623811,2.169418,520
1,gpt-3.5-turbo-1106,,0.6,few-shot,ENEM_2019_CN_CO_PROVA_519,default,original,pt-br,5,2724839799,24,AADAEEEBBDDDCEDDEEDACECDDCDDEBBEBCBCEADEADEAE,DADCCEBBCCACBEEBEEBACBCDDDDADBCBBCEAEADEADAAE,011001010000010011011011101001001100111111011,113.411820,2.520263,519
2,gpt-3.5-turbo-1106,,0.6,few-shot,ENEM_2019_LC_CO_PROVA_521,default,original,pt-br,5,2724839799,33,BDAAECDABAECEECCADCDEDDBCEDEBAAADDBECDCCAADCD,BDACACBABAECBBCCADCEBDBBCDDEEAAADDBECDECAAECD,111001011111001111100101101101111111110111011,128.157876,2.563158,521
3,gpt-3.5-turbo-1106,,0.6,few-shot,ENEM_2019_MT_CO_PROVA_522,default,original,pt-br,5,2724839799,13,DEDCDCECBCCDAEADECCCABECEBCECDADDEDACDCADADEC,DBEBACABCDBABECEEEDCBDCCEDCDABEDAADDDECACAECB,100001000000010010010001101000010010001101000,226.005417,5.022343,522
4,gpt-3.5-turbo-1106,,0.6,few-shot,ENEM_2020_CH_CO_PROVA_574,default,original,pt-br,5,2724839799,40,BDDBECECBACDDDEEBDEDDADCDDCCADECBCCCBBECDCCDE,BDDBECECBACDBDEEBDEDDAECDDCCADECBCCCBBEDABCDE,111111111111011111111101111111111111111000111,111.579915,2.479554,574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,mistral,7b,0.6,few-shot,ENEM_2022_CN_CO_PROVA_1092,default,original,pt-br,5,2724839799,23,DDDCDBCDDCDEBDDDCBCCDDADCDACDEDADCBDDADADADBD,DDECDBEACCAEBEAEBBCCDDCBDDACBEACEABCEABEDADBA,110111000101100001111100011101000010010011110,111.338998,2.474200,1092
76,mistral,7b,0.6,few-shot,ENEM_2022_LC_CO_PROVA_1072,default,original,en,5,2724839799,21,AAACECCBDACADBDBBEEEDBDACADABCEDEDCAADBDEDEEA,AADCECCCDCCEBACBBAAAEECABABEECEBEEDCADBDBDBAA,110111101010000110000001010001101000111101001,23.977936,0.479559,1072
77,mistral,7b,0.6,few-shot,ENEM_2022_LC_CO_PROVA_1072,default,original,pt-br,5,2724839799,22,ACDCDCCBDACADBDBBEDABDDABEDACDEDBDDAADBDBDBEA,AADCECCCDCCEBACBBAAAEECABABEECEBEEDCADBDBDBAA,101101101010000110010001100000100010111111101,33.911020,0.678220,1072
78,mistral,7b,0.6,few-shot,ENEM_2022_MT_CO_PROVA_1082,default,original,en,5,2724839799,4,DDBBBDDABDADDCCDCDBDDanuladaBABDDCDDBDDDCDCDBD...,BEEDAEABDDCEBDBAAAAACVCBCCCBCCDBDEECBDCABEECD,000000000100000000000100000000001000010000000,154.068816,3.423751,1082


In [6]:
import os
import zipfile
import tempfile
import random
import string
from pathlib import Path
import shutil

def compute_everything(source_directory, target_directory):
    # Get a list of all files in the specified directory
    file_list = os.listdir(source_directory)
    #print(file_list)

    for file_name in file_list:
        # Check if the file is a zip file
        if file_name.startswith("exp_") and file_name.endswith(".zip"):
            #print('file_name', file_name)
            zip_path = os.path.join(source_directory, file_name)

            # exp_all_LC_pt-br_2022_mistral_simple-zero-shot_bits_4_count_14.zip
            if 'all' not in file_name:
                continue
            (exp, exp_type, exam, language, year, llm, prompt, b, bits, c, shuffle_count) = file_name.split("_")
            print(file_name, ':', 'exam', exam, 'language', language, 'year', year, 'prompt', prompt, 'bits', bits, 'shuffle count', shuffle_count)

            Path(f"{target_directory}/{exam}").mkdir(parents=True, exist_ok=True)
            Path(f"{target_directory}/{exam}/{language}").mkdir(parents=True, exist_ok=True)
            Path(f"{target_directory}/{exam}/{language}/{year}").mkdir(parents=True, exist_ok=True)
            Path(f"{target_directory}/{exam}/{language}/{year}/{llm}").mkdir(parents=True, exist_ok=True)
            Path(f"{target_directory}/{exam}/{language}/{year}/{llm}/{prompt}").mkdir(parents=True, exist_ok=True)

            full_target_directory = f"{target_directory}/{exam}/{language}/{year}/{llm}/{prompt}" 
            #print('Will write to', full_target_directory)

            if os.path.exists(f"{full_target_directory}/aggregated/samples_with_irt.csv"):
                print(full_target_directory, ' already exists, skipping.\n')
                continue
                
            # Create a ZipFile object
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(path=full_target_directory)
                print(f"Files from {zip_path} extracted to: {full_target_directory}")
                
                if "SUBSET" not in full_target_directory:
                    thetas_df, average_df, thetas_random_df, average_random_df = run_R_IRT_from(full_target_directory)
                    average_df.to_csv(full_target_directory + "/aggregated/average_theta_by_score_sample.csv")
                    average_random_df.to_csv(full_target_directory + "/aggregated/average_theta_by_score_random_sample.csv")


In [7]:
source_directory_path = "C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL"
target_directory_path = "C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP"

print(f"Reading LLM responses from {source_directory_path} and generating IRT scores into {target_directory_path}\n")

compute_everything(source_directory_path, target_directory_path)

print("FINISHED!")

Reading LLM responses from C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL and generating IRT scores into C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP

exp_all_CH_pt-br_2020_llama2_simple-zero-shot_bits_4_count_1.zip : exam CH language pt-br year 2020 prompt simple-zero-shot bits 4 shuffle count 1.zip
C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP/CH/pt-br/2020/llama2/simple-zero-shot  already exists, skipping.

exp_all_CH_pt-br_2020_mistral_paper-nunes-2023-zero-shot_bits_4_count_10.zip : exam CH language pt-br year 2020 prompt paper-nunes-2023-zero-shot bits 4 shuffle count 10.zip
C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP/CH/pt-br/2020/mistral/paper-nunes-2023-zero-shot  already exists, skipping.

exp_all_CH_pt-br_2021_llama2_simple-zero-shot_bits_4_count_1.zip : exam CH language pt-br year 2021 prompt simple-zero-shot bits 4 shuffle count 1.zip
C:/Users/pedro/Downloads/TRI/test_responses_llms/EXP/CH/pt-br/2021/llama2/simple-zero-shot  already exist

In [8]:
source_directory_path = "C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/SUBSET"
target_directory_path = "C:/Users/pedro/Downloads/TRI/test_responses_llms/SUBSET"

print(f"Reading LLM responses from {source_directory_path} and generating IRT scores into {target_directory_path}\n")

compute_everything(source_directory_path, target_directory_path)

print("FINISHED!")

Reading LLM responses from C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/SUBSET and generating IRT scores into C:/Users/pedro/Downloads/TRI/test_responses_llms/SUBSET

FINISHED!


In [9]:
#df = pd.read_csv("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/GABRIEL/enem-experiments-results-processed.csv")
#theta_df = pd.read_csv("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/GABRIEL/thetas-enem-experiments-results-processed.csv")

#result_df = pd.concat([df, theta_df], axis=1, verify_integrity=False)
#result_df = result_df.T.drop_duplicates().T

#result_df.to_csv("C:/Users/pedro/Downloads/TRI/test_responses_llms/ZIPS/ALL/GABRIEL/experiments-with-irt.csv")

In [10]:
# 1062 CH
# 1082 MT
# 1092 CN


#result_df[result_df['CO_PROVA'] == 1092][['ENEM_EXAM', 'MODEL_NAME', 'LANGUAGE', 'MODEL_SIZE','CTT_SCORE','IRT_SCORE', 'MEAN_CORRECT_B', 'MEAN_INCORRECT_B ']].sort_values(by='CTT_SCORE', ascending=False)

