In [None]:
import glob
from collections import Counter

In [None]:
import pandas as pd
import numpy as np

In [None]:
pd.set_option('display.max_columns', None)

## Process GPT Grades

In [None]:
def try_parse_gpt(s):
    try:
        return int(s[0]) if isinstance(s, str) else 0 if pd.isna(s) else int(s)
    except:
        return 0

In [None]:
d_gpt_grade = dict()
for f in glob.glob('gpt4o-grades/*.csv'):
    tmp = pd.read_csv(f)
    for _, row in tmp.iterrows():
        d_gpt_grade[row['Transaction Id']] = try_parse_gpt(row['GPT-4o Score'])

## Condition References

In [None]:
df_condition = pd.read_csv('PLUS Lesson Versions by Anon Id - All Lessons.csv')
df_condition = df_condition.dropna(subset=['Anon Student Id', 'Lesson']).copy()
df_condition.rename(columns={
    'Tutors Assigned AI Condition ': 'ai_condition_merged',
    'Lesson': 'Level (Lesson)'
}, inplace=True)
df_condition = df_condition.drop_duplicates(subset=['Anon Student Id', 'Level (Lesson)'], keep='first')

## Lesson Logs

In [None]:
dfs = []
for f in glob.glob('lesson-data/*.csv'):
    tmp = pd.read_csv(f, low_memory=False)
    dfs.append(tmp)
df = pd.concat(dfs)

In [None]:
df['GPT4o Score'] = df['Transaction Id'].map(d_gpt_grade)

In [None]:
df['Log Score'] = df.Outcome.map(lambda s: np.nan if not isinstance(s, str) else 1 if s=='CORRECT' else 0)

In [None]:
df['score'] = df['GPT4o Score'].combine_first(df['Log Score'])

In [None]:
df = df.merge(df_condition, how='left', on=['Anon Student Id', 'Level (Lesson)']).copy()

## Searching and Removing Test Accounts

In [None]:
tmp = df[df['GPT4o Score'].map(lambda s: not pd.isna(s))].copy()
remove_ids = set(tmp[tmp['Input'].map(lambda s: len(s) if isinstance(s, str) else np.nan) < 10]['Anon Student Id'])
df = df[df['Anon Student Id'].map(lambda s: s not in remove_ids)].copy()

## RQ3 Tutor Rating of Feedback

In [None]:
df_rq3 = df[(df['Action'] == 'RateAIFeedback') & (df['Is Last Attempt'] == 1)]

In [None]:
df_rq3.shape[0]

In [None]:
df_rq3.Input.value_counts()

In [None]:
round((df_rq3.Input.value_counts()*100)/df_rq3.shape[0], 2)

## Export for Analysis

In [None]:
df = df[
    (df['Is Last Attempt']==1)
].copy()

df['posttest'] = df['Problem Name'].map(lambda s: s.split('.')[0] in ['9', '10', '11', '12'])
df['lesson'] = df['Level (Lesson)']
df['student_id'] = df['Anon Student Id']
df['ai_condition'] = df['ai_condition_merged']
df['response_condition'] = df['CF (Condition)']
df['question_type'] = df['Problem Name'].map(lambda s: 'predict' if s.split('.')[0] in ['9', '10'] else 'explain')
df['response_type'] = df['Problem Name'].map(lambda s: 'generated' if s.split('.')[0] in ['9', '11'] else 'selected')

In [None]:
df.to_csv('full_sample_logs_ectel25.csv', index=False)

## ToT Reference

In [None]:
def try_parse_status(s):
    try:
        return int(s[0])
    except:
        try:
            return int(s)
        except:
            return s

In [None]:
df['CF (AI_Evaluation)'] = df['CF (AI_Evaluation)'].map(try_parse_status)

In [None]:
df_wide = df.groupby(['student_id', 'lesson'])['CF (AI_Evaluation)'].value_counts().reset_index()

In [None]:
df_wide = df_wide.pivot(index=['student_id', 'lesson'], columns='CF (AI_Evaluation)', values='count').reset_index()
df_wide.index.name = ''
df_wide = df_wide.fillna(0)

In [None]:
# Renaming the columns
df_wide = df_wide.rename(columns={1: 'n_received_ai', -1: 'n_error_ai'})

# Adding calculated columns
df_wide['n_requested_ai'] = df_wide['n_received_ai'] + df_wide['n_error_ai']
df_wide['ratio_error'] = df_wide['n_error_ai'] / df_wide['n_requested_ai']

In [None]:
df_wide.to_csv('ai_usage-ectel2025.csv', index=False)