In [1]:
import itertools
import pickle

import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score

In [2]:
df = pd.read_csv('sigcse_2024.csv')

train_df = df[df.subset == 'train'].copy()
validate_df = df[df.subset == 'validate'].copy()
test_df = df[df.subset == 'test'].copy()

In [3]:
kernels = ['linear', 'poly', 'rbf']
cs = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]

In [4]:
suffix_to_config_to_metrics = pickle.load(open('embeddings/suffix_to_config_to_metrics.pkl', 'rb'))

In [5]:
suffix_to_best_config = {}
for suffix in suffix_to_config_to_metrics:
    config_to_metrics = suffix_to_config_to_metrics[suffix]
    best_configs = [max(config_to_metrics, key=lambda x : config_to_metrics.get(x)[i]) for i in range(3)]
    assert len(set(best_configs)) == 1
    suffix_to_best_config[suffix] = best_configs[0]

In [6]:
embeddings = pickle.load(open(f'embeddings/openai_api.pkl', 'rb'))

In [7]:
for suffix in suffix_to_best_config:
    best_config = suffix_to_best_config[suffix]
    
    for qid, sub_train_df in train_df.groupby('qid'):
        if len(sub_train_df[suffix].unique()) == 1:
            sub_validate_df = validate_df[validate_df.qid == qid]
            sub_test_df = test_df[test_df.qid == qid]
            
            validate_df.loc[sub_validate_df.index, f'embedding_openai_api_{suffix}_svc'] = sub_train_df[suffix].unique()[0]
            test_df.loc[sub_test_df.index, f'embedding_openai_api_{suffix}_svc'] = sub_train_df[suffix].unique()[0]
        else:
            svc = SVC(kernel=best_config.split('_')[0], C=int(best_config.split('_')[1]))

            train_X = np.zeros((len(sub_train_df), np.shape(embeddings)[1]))
            train_y = np.zeros((len(sub_train_df), ), dtype=int)
            index = 0
            for row_index, row in sub_train_df.iterrows():
                train_X[index, :] = embeddings[row_index]
                train_y[index] = row[suffix]

                index += 1
            svc.fit(train_X, train_y)

            sub_validate_df = validate_df[validate_df.qid == qid]
            validate_X = np.zeros((len(sub_validate_df), np.shape(embeddings)[1]))
            validate_y = np.zeros((len(sub_validate_df), ), dtype=int)
            index = 0
            for row_index, row in sub_validate_df.iterrows():
                validate_X[index, :] = embeddings[row_index]
                validate_y[index] = row[suffix]

                index += 1
            predicted = svc.predict(validate_X)
            validate_df.loc[sub_validate_df.index, f'embedding_openai_api_{suffix}_svc'] = predicted

            sub_test_df = test_df[test_df.qid == qid]
            test_X = np.zeros((len(sub_test_df), np.shape(embeddings)[1]))
            test_y = np.zeros((len(sub_test_df), ), dtype=int)
            index = 0
            for row_index, row in sub_test_df.iterrows():
                test_X[index, :] = embeddings[row_index]
                test_y[index] = row[suffix]

                index += 1
            predicted = svc.predict(test_X)
            test_df.loc[sub_test_df.index, f'embedding_openai_api_{suffix}_svc'] = predicted

In [8]:
for suffix in suffix_to_best_config:
    accuracy = accuracy_score(test_df[suffix], test_df[f'embedding_openai_api_{suffix}_svc'])
    kappa = cohen_kappa_score(test_df[suffix], test_df[f'embedding_openai_api_{suffix}_svc'])
    f1 = f1_score(test_df[suffix], test_df[f'embedding_openai_api_{suffix}_svc'])
    print(suffix)
    print(accuracy, kappa, f1)

c
0.8838709677419355 0.7640966020187074 0.8968481375358166
una
0.7725806451612903 0.4262876043471412 0.8438538205980065
hl
0.9258064516129032 0.7342774620329824 0.9554263565891473


In [9]:
validate_df['c_una_hl'] = validate_df.apply(lambda row : f'{row.c}_{row.una}_{row.hl}', axis=1)
validate_df['c_una_hl_predicted'] = validate_df.apply(lambda row : f'{int(row.embedding_openai_api_c_svc)}_{int(row.embedding_openai_api_una_svc)}_{int(row.embedding_openai_api_hl_svc)}', axis=1)
validate_df['converted_binary_ground_truth'] = validate_df.c_una_hl.apply(lambda x : 1 if x == '1_1_1' else 0)
validate_df['converted_binary_predicted'] = validate_df.c_una_hl_predicted.apply(lambda x : 1 if x == '1_1_1' else 0)

In [10]:
test_df['c_una_hl'] = test_df.apply(lambda row : f'{row.c}_{row.una}_{row.hl}', axis=1)
test_df['c_una_hl_predicted'] = test_df.apply(lambda row : f'{int(row.embedding_openai_api_c_svc)}_{int(row.embedding_openai_api_una_svc)}_{int(row.embedding_openai_api_hl_svc)}', axis=1)
test_df['converted_binary_ground_truth'] = test_df.c_una_hl.apply(lambda x : 1 if x == '1_1_1' else 0)
test_df['converted_binary_predicted'] = test_df.c_una_hl_predicted.apply(lambda x : 1 if x == '1_1_1' else 0)

In [11]:
print(accuracy_score(test_df['converted_binary_ground_truth'], test_df['converted_binary_predicted']))
print(cohen_kappa_score(test_df['converted_binary_ground_truth'], test_df['converted_binary_predicted']))
print(f1_score(test_df['converted_binary_ground_truth'], test_df['converted_binary_predicted']))

0.8661290322580645
0.7254002134471718
0.838206627680312


In [12]:
def order_predict(row, order):
    if row.converted_binary_ground_truth == 1:
        return row.converted_binary_predicted
    else:
        for suffix in order:
            if row[suffix] == 1:
                if row[f'embedding_openai_api_{suffix}_svc'] == 1:
                    continue
                else:
                    return -1
            else:
                if row[f'embedding_openai_api_{suffix}_svc'] == 1:
                    continue
                else:
                    return 0
        return 1

In [13]:
orders = [
    ['c', 'hl', 'una'], 
    ['c', 'una', 'hl'],
    ['hl', 'c', 'una'],
    ['hl', 'una', 'c'],
    ['una', 'c', 'hl'],
    ['una', 'hl', 'c']
]

In [14]:
for order in orders:
    order_string = '_'.join(order)
    validate_df[order_string] = validate_df.apply(lambda row :  order_predict(row, order), axis=1)
    validate_df[f'{order_string}_ground_truth'] = validate_df['converted_binary_ground_truth']
    validate_df.loc[validate_df[order_string] == -1, f'{order_string}_ground_truth'] = 1
    validate_df.loc[validate_df[order_string] == -1, order_string] = 0
    
    print(order)
    print(accuracy_score(validate_df[f'{order_string}_ground_truth'], validate_df[order_string]))
    print(cohen_kappa_score(validate_df[f'{order_string}_ground_truth'], validate_df[order_string]))
    print(f1_score(validate_df[f'{order_string}_ground_truth'], validate_df[order_string]))

['c', 'hl', 'una']
0.826797385620915
0.647358121330724
0.7999999999999999
['c', 'una', 'hl']
0.8284313725490197
0.6505481597494127
0.8015122873345935
['hl', 'c', 'una']
0.8186274509803921
0.6314453125
0.7925233644859814
['hl', 'una', 'c']
0.8006535947712419
0.5966549980552315
0.7765567765567766
['una', 'c', 'hl']
0.8055555555555556
0.606113707165109
0.7808471454880296
['una', 'hl', 'c']
0.803921568627451
0.6029583495523549
0.7794117647058822


In [15]:
print(accuracy_score(test_df['c_una_hl'], test_df['c_una_hl_predicted']))
print(cohen_kappa_score(test_df['c_una_hl'], test_df['c_una_hl_predicted']))
print(f1_score(test_df['c_una_hl'], test_df['c_una_hl_predicted'], average='weighted'))

0.6612903225806451
0.5503755503755503
0.6452956635729279


In [16]:
order = ['c', 'una', 'hl']
order_string = '_'.join(order)
test_df[order_string] = test_df.apply(lambda row :  order_predict(row, order), axis=1)
test_df[f'{order_string}_ground_truth'] = test_df['converted_binary_ground_truth']
test_df.loc[test_df[order_string] == -1, f'{order_string}_ground_truth'] = 1
test_df.loc[test_df[order_string] == -1, order_string] = 0

print(order)
print(accuracy_score(test_df[f'{order_string}_ground_truth'], test_df[order_string]))
print(cohen_kappa_score(test_df[f'{order_string}_ground_truth'], test_df[order_string]))
print(f1_score(test_df[f'{order_string}_ground_truth'], test_df[order_string]))

['c', 'una', 'hl']
0.8145161290322581
0.6236434272201343
0.7889908256880733


In [17]:
print(accuracy_score(test_df['binary_ground_truth'], test_df['converted_binary_ground_truth']))
print(cohen_kappa_score(test_df['binary_ground_truth'], test_df['converted_binary_ground_truth']))
print(f1_score(test_df['binary_ground_truth'], test_df['converted_binary_ground_truth']))

0.8887096774193548
0.7661016949152543
0.8571428571428572
