In [8]:
%load_ext autoreload
%autoreload 2
import os
import matplotlib.pyplot as plt
import seaborn as sns
from os.path import join
from tqdm import tqdm
import pandas as pd
import sys
from typing import List
import numpy as np
from copy import deepcopy
import joblib
from pprint import pprint
from sklearn.preprocessing import normalize
from math import ceil
import cortex
from neuro import config
from collections import defaultdict
from scipy.stats import norm
from statsmodels.stats.multitest import multipletests
from neuro import flatmaps_helper
from neuro.flatmaps_helper import load_flatmaps
import neuro.sasc.viz
import neuro.viz
from neuro import analyze_helper
import nibabel as nib
neurosynth_compare = __import__('04_neurosynth_compare')
import neurosynth
from neuro.features.questions.gpt4 import QS_35_STABLE
from neuro.features import qa_questions
import dvu
import viz
dvu.set_style()

config.setup_freesurfer()
N_SURVEY_RESPONSES = 12

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### ensemble (non-gpt-4) feats each run one at a time

In [None]:
rr_all = pd.read_pickle(join(config.RESULTS_DIR_LOCAL, 'results_full_oct17.pkl'))
r = rr_all[rr_all.ndelays == 4]
r = r[r.pc_components == 100]
r = r[r.feature_space == 'qa_embedder']
r = r[r.qa_questions_version == 'v3_boostexamples_merged']
r = r[r.qa_embedding_model == 'ensemble2']
r = r[r.single_question_idx >= 0]
r = r[r.feature_selection_alpha == -1]

ravg = r.groupby(['single_question_idx'])[
    ['corrs_test_mean']].mean().reset_index()
qs = qa_questions.get_merged_questions_v3_boostexamples()
ravg['question'] = ravg['single_question_idx'].apply(lambda i: qs[i])
ravg['q_selected'] = ravg['question'].apply(lambda q: q in QS_35_STABLE)
ravg['question_abbrev'] = ravg['question'].apply(
    analyze_helper.abbrev_question)

## Visualize top and bottom questions

In [None]:
# ravg.to_pickle('corrs_df/single_question_corrs.pkl')

In [None]:
r_selected = ravg[ravg.q_selected]
r_unselected = ravg[~ravg.q_selected]
plt.figure(figsize=(6, 3))
sns.histplot(r_unselected.corrs_test_mean, label='Unselected', color="C1")
sns.histplot(r_selected.corrs_test_mean, label='Selected 35', color="C0")
plt.legend()
plt.xlabel('Test correlation using single-question model')
plt.ylabel('Question count')
print('means', r_selected.corrs_test_mean.mean(),
      r_unselected.corrs_test_mean.mean())
neuro.viz.savefig(
    'monosemantic/single_question_perf_hists.pdf', bbox_inches='tight')

In [None]:
# with pd.option_context('display.max_colwidth', None,
#                        'display.max_rows', None):
#     display(ravg[~ravg.q_selected].sort_values('corrs_test_mean', ascending=False)
#             [['question_abbrev', 'corrs_test_mean']].head(20))
print(
    ravg[~ravg.q_selected]
    .sort_values('corrs_test_mean', ascending=False)[['question_abbrev', 'corrs_test_mean']]
    .head(15).to_latex(float_format="%.3f", index=False)
)
print(
    ravg[~ravg.q_selected]
    .sort_values('corrs_test_mean', ascending=False)[['question_abbrev', 'corrs_test_mean']]
    .tail(15).to_latex(float_format="%.3f", index=False)
)

### load survey results

In [36]:
survey_results = pd.read_csv('survey_results.csv')
# set first column name to 'question_abbrev'
survey_results.rename(columns={survey_results.columns[0]: 'question_abbrev'}, inplace=True)

# apply lambda function to col names
survey_results.columns = [x[:x.index('(')].strip() if '(' in x else x for x in survey_results.columns]
# add question mark to question_abbrev

def remove_parens(s):
    if '(' in s and ')' in s:
        # remove everything from the first '(' to the first ')'
        # and return the rest of the string
        s = s[:s.index('(')] + s[s.index(')') + 1:]
    return s

survey_results['question_abbrev'] = survey_results['question_abbrev'].apply(
    remove_parens)
survey_results['question_abbrev'] = survey_results['question_abbrev'].apply(
    lambda x: x.strip() + '?' if not x.strip().endswith('?') else x)

RENAME_DICT = {
    '...contain numbers?': '...contain a number?',
    "...are reflective, involving self-analysis or introspection?":"...reflective, involving self-analysis or introspection?",
    "...are related to a specific industry or profession?":
        "...related to a specific industry or profession?",
     "...is abstract rather than concrete?":
        "...abstract rather than concrete?",
    "...describe an interpersonal misunderstanding or dispute?":
        "...describe a an interpersonal misunderstanding or dispute?",
     "...cointain first-person pronoun?":
        "...first-person pronoun in the input?",
    "...are part of a legal document or text?":
        "...part of a legal document or text?",
    '...include a description about dialogue?':    
        '...include dialogue?',
    '...describe an educational lesson or class?':
        '...educational lesson or class described?',
}
# rename columns according to RENAME_DICT
survey_results['question_abbrev'] = [RENAME_DICT.get(x, x) for x in survey_results['question_abbrev']]


# merge with ravg on question_abbrev
merged = ravg.merge(survey_results, on='question_abbrev', how='right')

# for any duplicate question_abbrev, take the one where q_selected is True
merged = merged.sort_values('q_selected', ascending=False).drop_duplicates(
    'question_abbrev').sort_index()

merged['Standard Error'] = merged['Standard Deviation'] / np.sqrt(N_SURVEY_RESPONSES)
merged['legend'] = merged['q_selected'].apply(
    lambda x: 'Selected 35' if x else 'Unselected')
merged.sort_values('corrs_test_mean', ascending=False, inplace=True)
merged['category'] = merged['question'].apply(lambda x: viz.REMAP_QUESTIONS_TO_CATEGORY_NAMES.get(x, x))

In [39]:
merged[merged['corrs_test_mean'] < 0.01]['Average'].mean()

np.float64(1.416)

In [43]:
vals = merged[merged['corrs_test_mean'] > 0.01][['Average', 'corrs_test_mean']]

# compute correlation and p-value
from scipy.stats import pearsonr
corr, pval = pearsonr(vals['Average'], vals['corrs_test_mean'])
print(f'Correlation: {corr:.3f}, p-value: {pval:.3f}')

Correlation: 0.374, p-value: 0.017


In [26]:
# # merged = merged[['question_abbrev', 'Average', 'Standard Deviation', 'corrs_test_mean']].sort_values(by='corrs_test_mean', ascending=False)
# with pd.option_context('display.max_colwidth', None,
#                        'display.max_rows', None):
#     display(merged[merged.corrs_test_mean.isna()]['question_abbrev'])
#     # display(ravg.question_abbrev)

In [31]:
plt.figure(figsize=(6, 3))
plt.errorbar(merged['corrs_test_mean'], merged['Average'],
             yerr=merged['Standard Error'], fmt='none', capsize=2,
             elinewidth=1, alpha=0.35, color='gray', zorder=-11)
sns.scatterplot(data=merged, x='corrs_test_mean', y='Average',
                hue='category',
                style='legend',
                hue_order=[
                    'Visuospatial', 'Communication',
                'Beliefs, values, emotions', 'Numeric',
                'Tactile', 'Other', ],
                style_order=['Selected 35', 'Unselected'],
                s=100, alpha=0.9)
plt.xlabel('Test correlation using single-question model')
plt.ylabel('Expert rating')
plt.legend(title='', frameon=False, handletextpad=0.1)
# put legend outside
plt.legend(title='', frameon=False, handletextpad=0.1, loc='upper left',
           bbox_to_anchor=(1, 1), ncol=1)
neuro.viz.savefig(
    'monosemantic/single_question_perf_vs_survey.png', bbox_inches='tight', dpi=400)
plt.show()