In [18]:
import pickle
import pandas as pd

with open('data/intermediate/interest.pkl', 'rb') as f:
    interest = pickle.load(f)

with open('data/intermediate/stats.pkl', 'rb') as f:
    stats = pickle.load(f)

with open('data/intermediate/metadata.pkl', 'rb') as f:
    meta_complete = pickle.load(f)

with open('data/intermediate/term.pkl', 'rb') as f:
    time = pickle.load(f)

prof_q_df = pd.read_csv('data/intermediate/prof_q.csv')
challenge_df = pd.read_csv('data/intermediate/challenge_q.csv')

prof_q_df.drop(columns=['Unnamed: 0.1'], inplace=True)
challenge_df.drop(columns=['Unnamed: 0'], inplace=True)

prof_q_df.rename(columns={'Unnamed: 0': 'Question'}, inplace=True)
challenge_df.rename(columns={'Unnamed: 0': 'Question'}, inplace=True)

In [19]:
keywords_to_remove = [
    'Listening-Statistics', 'Reading-Statistics', 'Speaking-Statistics',
    'Grammar-Statistics', 'Vocabulary-Statistics', 'Orthography-Statistics'
]

prof_q_df_filtered = prof_q_df[
    ~prof_q_df['Question'].str.contains('|'.join(keywords_to_remove), case=False, na=False)
].copy()



wide_prof = prof_q_df_filtered.pivot(index='source', columns='Question')
wide_prof.columns = ["_".join(map(str, i)) for i in wide_prof.columns.to_flat_index()]

challenge_wide = challenge_df.pivot(index='source', columns='Question')
challenge_wide.columns = ["_".join(map(str, i)) for i in challenge_wide.columns.to_flat_index()]

meta_complete_df = pd.DataFrame.from_dict(meta_complete, orient='index')

time_df = pd.DataFrame.from_dict(time, orient='index')

interest_df = pd.DataFrame.from_dict(interest, orient='index')

after_flat = pd.json_normalize(interest_df['Prior to starting this class, your interest level was?'])
prior_flat = pd.json_normalize(interest_df['Now that this class is over, your interest is?'])

after_flat.columns = [f"after_{col}" for col in after_flat.columns]
prior_flat.columns = [f"prior_{col}" for col in prior_flat.columns]

records = []

for source, imgs in interest.items():
    row = {'source': source}

    for img_type in ['after.png', 'prior.png']:
        if img_type in imgs:
            for k, v in imgs[img_type].items():
                row[f"{img_type}_{k}"] = v

    records.append(row)

flat_df = pd.DataFrame(records)
flat_df = flat_df.set_index('source')

merged_df = (
    wide_prof
    .join(challenge_wide, how='outer')
    .join(meta_complete_df, how='outer')
    .join(time_df, how='outer')
    .join(flat_df, how='outer')
)

In [21]:
merged_df.to_csv('data/final/course_evals.csv')