In [1]:
%load_ext autoreload
%autoreload 2
import os
from os.path import join, expanduser
from tqdm import tqdm
import pandas as pd
import joblib
import sys
import numpy as np
from copy import deepcopy
from neuro.features.feat_select import get_alphas
from neuro.features.qa_questions import get_questions

In [2]:
sparse_feats_dir = expanduser('~/mntv1/deep-fMRI/qa/sparse_feats_shared/')
os.listdir(sparse_feats_dir)
qa_sparse_feats_dir = join(
    sparse_feats_dir,
    # 'qa_embedder___qa_questions_version=v3_boostexamples___ensemble1')
    'eng1000___qa_questions_version=v1___mistralai-Mistral-7B-Instruct-v0.2')
# alphas = np.logspace(0, -3, 20)
# alphas = get_alphas('qa_embedder')
alphas = get_alphas('eng1000')

In [3]:
seeds = range(5)
nonzeros = pd.DataFrame(index=seeds, columns=alphas).astype(float)
enets = pd.DataFrame(index=seeds, columns=alphas)
fnames = os.listdir(qa_sparse_feats_dir)
for seed in tqdm(seeds):
    for alpha in tqdm(alphas):
        template = f'seed={seed}___feature_selection_frac=0.50___feature_selection_alpha={alpha:.2e}.joblib'
        if template in fnames:
            coef_enet = joblib.load(join(qa_sparse_feats_dir, template))
            coef_enet_selected = deepcopy(
                np.any(np.abs(coef_enet) > 0, axis=0).squeeze())
            enets.loc[seed, alpha] = coef_enet_selected
            nonzeros.loc[seed, alpha] = coef_enet_selected.sum()
# template = f'seed={seed}___feature_selection_frac=0.50___feature_selection_alpha={feature_selection_alpha:.2e}.joblib'
# os.listdir(qa_sparse_feats_dir)

100%|██████████| 9/9 [00:00<00:00, 12.11it/s]
100%|██████████| 9/9 [00:00<00:00, 12.40it/s]
100%|██████████| 9/9 [00:00<00:00, 13.42it/s]
100%|██████████| 9/9 [00:00<00:00, 12.41it/s]
100%|██████████| 9/9 [00:00<00:00, 12.38it/s]
100%|██████████| 5/5 [00:03<00:00,  1.38it/s]


In [4]:
# v3_boost_examples has 674, eng1000 has 985
nonzeros.columns.name = 'alpha'
nonzeros.index.name = 'seed'
# nonzeros.columns = np.arange(len(nonzeros.columns))
# enets.columns = nonzeros.columns
# nonzeros.columns = nonzeros.columns.round(4)

coefs_stable_dict = {}
coefs_all_dict = {}
# add row for fracs
for col in nonzeros.columns:
    coefs_list = enets[col]
    coefs_list = coefs_list[coefs_list.notna()]

    if len(coefs_list) > 0:
        # get fraction of times each element is True
        coefs_arr = np.vstack(coefs_list.values)
        coefs_all = coefs_arr.max(axis=0)
        coefs_arr = coefs_arr.min(axis=0)
        nonzeros_stable = np.sum(coefs_arr)
    else:
        nonzeros_stable = np.nan
    nonzeros.loc['stable', col] = nonzeros_stable
    coefs_all_dict[col] = deepcopy(coefs_all)
    coefs_stable_dict[col] = deepcopy(coefs_arr)

display(
    nonzeros
    .style
    .background_gradient(cmap='viridis', axis=None)
    .format('{:.0f}')
)

alpha,0.335982,0.233572,0.190000,0.162378,0.112884,0.078476,0.054556,0.037927,0.026367
seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,10,62,115,155,227,344,560,762,896.0
1,13,53,111,148,229,343,562,759,892.0
2,14,58,107,144,233,351,550,764,
3,10,56,106,145,230,346,554,759,899.0
4,11,60,115,153,224,347,560,760,897.0
stable,5,20,53,83,171,290,501,729,883.0


### Top questions

In [9]:
questions = np.array(get_questions('v3_boostexamples', full=True))
qs_prev = []
for i in range(4):
    print(i, 'num questions:', len(
        questions[list(coefs_stable_dict.values())[i]]))

    print('----STABLE---')
    qs = sorted(questions[list(coefs_stable_dict.values())[i]].tolist())
    qs_unstable = sorted(questions[list(coefs_all_dict.values())[i]].tolist())
    display([q for q in qs if q not in qs_prev])
    for q in qs_prev:
        if not q in qs:
            print('[DROPPED]', q)

    print('----UNSTABLE---')
    display([q for q in qs_unstable if not q in qs])

    qs_prev = qs

0 num questions: 10
----STABLE---


['Does the input include a philosophical or reflective thought?',
 'Does the sentence contain a proper noun?',
 'Does the sentence describe a physical action?',
 'Does the sentence describe a relationship between people?',
 'Does the sentence include dialogue or thoughts directed towards another character?',
 'Does the sentence involve a description of an interpersonal misunderstanding or dispute?',
 'Does the sentence involve a description of physical environment or setting?',
 'Does the sentence involve the mention of a specific object or item?',
 'Does the sentence reference a specific location or place?',
 'Is time mentioned in the input?']

----UNSTABLE---


['Does the input involve planning or organizing?',
 'Does the sentence describe a personal or social interaction that leads to a change or revelation?',
 'Does the sentence describe a visual experience or scene?',
 'Does the sentence include technical or specialized terminology?',
 'Does the sentence involve a discussion about personal or social values?',
 'Does the sentence involve an expression of personal values or beliefs?',
 'Does the sentence mention a specific location or place?',
 'Is the input about a relationship with another person?']

1 num questions: 21
----STABLE---


['Does the input involve planning or organizing?',
 'Does the sentence describe a personal or social interaction that leads to a change or revelation?',
 'Does the sentence describe a specific sensation or feeling?',
 'Does the sentence describe a visual experience or scene?',
 'Does the sentence include dialogue?',
 'Does the sentence include technical or specialized terminology?',
 'Does the sentence involve a discussion about personal or social values?',
 'Does the sentence involve an expression of personal values or beliefs?',
 'Does the sentence mention a specific location or place?',
 'Is the input related to a specific industry or profession?',
 "Is the sentence conveying the narrator's physical movement or action in detail?"]

----UNSTABLE---


['Does the input describe a sensory experience?',
 'Does the input describe a specific texture or sensation?',
 'Does the sentence contain a cultural reference?',
 'Does the sentence describe a sensory experience?',
 "Does the sentence express the narrator's opinion or judgment about an event or character?",
 'Does the sentence include numerical information?',
 'Does the sentence involve spatial reasoning?',
 'Is the input about a relationship with another person?']

2 num questions: 28
----STABLE---


['Does the input describe a specific texture or sensation?',
 'Does the sentence contain a cultural reference?',
 'Does the sentence contain a negation?',
 'Does the sentence express a sense of belonging or connection to a place or community?',
 "Does the sentence express the narrator's opinion or judgment about an event or character?",
 'Does the sentence include numerical information?',
 'Does the sentence reference a specific time or date?',
 'Is the sentence in the passive voice?']

[DROPPED] Does the sentence involve a description of an interpersonal misunderstanding or dispute?
----UNSTABLE---


['Does the input describe a sensory experience?',
 'Does the input include a comparison or metaphor?',
 'Does the input involve a debate or controversial topic?',
 'Does the sentence describe a physical sensation?',
 'Does the sentence describe a sensory experience?',
 'Does the sentence include a direct speech quotation?',
 'Does the sentence include a number or statistic?',
 'Does the sentence involve a description of an emotional reaction to a surprise or unexpected event?',
 'Does the sentence involve a description of an interpersonal misunderstanding or dispute?',
 'Does the sentence involve moral reasoning?',
 'Does the sentence involve spatial reasoning?',
 'Does the sentence use irony or sarcasm?',
 'Does the text describe a journey?',
 'Does the text include a planning or decision-making process?',
 'Is the input about a relationship with another person?',
 'Is the sentence conveying a strategic or tactical thought by the narrator?',
 'Is the sentence expressing skepticism or 

3 num questions: 44
----STABLE---


['Does the input discuss a societal issue or social justice topic?',
 'Does the input include a comparison or metaphor?',
 'Does the sentence describe a physical sensation?',
 'Does the sentence describe a routine activity?',
 'Does the sentence describe a sensory experience?',
 'Does the sentence express a philosophical or existential query or observation?',
 'Does the sentence include a direct speech quotation?',
 'Does the sentence include a number or statistic?',
 'Does the sentence include a personal anecdote or story?',
 'Does the sentence involve moral reasoning?',
 'Does the sentence involve spatial reasoning?',
 'Does the sentence use a unique or unusual word?',
 'Does the sentence use irony or sarcasm?',
 'Does the text describe a journey?',
 'Is the input about a practical joke or prank?',
 'Is the sentence a command?',
 'Is the sentence designed to persuade or convince?',
 'Is there mention of a city, country, or geographic feature?']

[DROPPED] Does the sentence express the narrator's opinion or judgment about an event or character?
[DROPPED] Does the sentence include dialogue or thoughts directed towards another character?
----UNSTABLE---


['Does the input contain a measurement?',
 'Does the input describe a sensory experience?',
 'Does the input describe a specific emotion in detail?',
 'Does the input involve a debate or controversial topic?',
 'Does the input involve a philosophical question or theory?',
 'Does the input mention a routine or daily activity?',
 'Does the sentence convey a decision or choice made by the narrator?',
 'Does the sentence convey a sense of urgency or haste?',
 'Does the sentence describe a moment of relief or resolution of tension?',
 'Does the sentence describe a personal reflection or thought?',
 'Does the sentence describe a physical sensation (e.g., touch, taste)?',
 'Does the sentence describe a significant personal interaction?',
 'Does the sentence describe an activity related to daily life or routine?',
 'Does the sentence describe an experience of learning or gaining new knowledge?',
 "Does the sentence express the narrator's opinion or judgment about an event or character?",
 'Doe