# a notebook for loading and plotting SDK validation results. Also an example on viewing detailed error logs
Doug Ollerenshaw, 4/13/2020

# imports

In [None]:
import visual_behavior.data_access.loading as loading
import visual_behavior.validation.sdk as sdk_validation
from visual_behavior.validation.sdk import ValidateSDK
from visual_behavior import database as db

import datetime


%widescreen
%standard_imports

# load data

## get behavior session table from cache, merge in some other columns

In [None]:
cache = sdk_validation.get_cache()
behavior_session_table = cache.get_behavior_session_table()
project_table = db.lims_query("select id,code from projects")
ophys_session_table = db.lims_query("select id,project_id from ophys_sessions")
filtered_ophys_experiment_table = loading.get_filtered_ophys_experiment_table()

## get project code using behavior sessions and specimens
(thanks Nick M!)

In [None]:
query = '''SELECT behavior_sessions.id, specimens.project_id FROM specimens
JOIN donors ON specimens.donor_id=donors.id
JOIN behavior_sessions ON donors.id=behavior_sessions.donor_id'''
behavior_id_project_id_map = db.lims_query(query).rename(columns={'id':'behavior_session_id'}).merge(
    project_table,
    left_on='project_id',
    right_on='id',
    how='left',
).drop(columns=['id']).rename(columns={'code':'project_code'}).drop_duplicates('behavior_session_id').set_index('behavior_session_id')

In [None]:
donor_to_specimen_map = {
    donor_id:db.get_mouse_ids('donor_id', donor_id)['specimen_id'].iloc[0] for donor_id in behavior_session_table['donor_id'].unique()
}
behavior_session_table['specimen_id'] = behavior_session_table['donor_id'].map(lambda donor_id: donor_to_specimen_map[donor_id])

## Get cached validation results from Mongo database

In [None]:
## get validation results from mongo
validation_results = sdk_validation.get_validation_results().sort_index()

# compare length of validation results and length of behavior session table to get percent complete
# this was useful when monitoring progress of cluster jobs used to do validation
print('Jobs are {:0.2f}% complete'.format(100*len(validation_results)/len(behavior_session_table)))

# merge in behavior session table
validation_results = validation_results.merge(
    behavior_session_table,
    left_index=True,
    right_index=True,
    how='left',
).sort_values(by=['is_ophys','session_type','behavior_session_id'])
# filter out the NP
validation_results = validation_results[~validation_results['equipment_name'].str.contains('NP')]

# merge in project code
validation_results = validation_results.merge(
    behavior_id_project_id_map,
    left_index=True,
    right_index=True,
    how='left'
)

validation_results['specimen_id']

# get filtered ophys session IDS (sessions that have passed QC)
filtered_ophys_session_ids = list(np.sort(filtered_ophys_experiment_table['ophys_session_id'].unique()))
filtered_specimen_ids = list(np.sort(filtered_ophys_experiment_table['specimen_id'].unique()))

In [None]:
def in_filtered_list(row):
    if pd.notnull(row['ophys_session_id']):
        # if ophys, is session in filtered session ID list?
        return row['ophys_session_id'] in filtered_ophys_session_ids
    else:
        # if behavior only, does the animal have at least one session in filtered session ID list?
        return row['specimen_id'] in filtered_specimen_ids
        

# filter out ophys sessions that haven't passed QC
# validation_results['in_filtered_table'] = (validation_results['ophys_session_id']
#                                            .fillna(0)
#                                            .astype(int)
#                                            .map(lambda osid:osid in filtered_ophys_ids))
validation_results['in_filtered_table'] = validation_results.apply(in_filtered_list, axis=1)

## of the ophys sessions, what is the breakdown of QC True/False?

In [None]:
validation_results.query('is_ophys == False')['in_filtered_table'].value_counts()

In [None]:
validation_results.query('is_ophys == True')['in_filtered_table'].value_counts()

# Generate and show the figure (interactive plotly figure is displayed inline)

In [None]:
savefig = True

sort_by = ['is_ophys','project_code','session_type','behavior_session_id']

fig = sdk_validation.make_sdk_heatmap(
    validation_results[
        (validation_results['in_filtered_table']==True)
        &(pd.notnull(validation_results['session_type']))
    ].sort_values(by=sort_by)
)
if savefig:
    fig.write_html("/home/dougo/code/dougollerenshaw.github.io/figures_to_share/sdk_validation_matrix.html")
fig.show()


In [None]:
savefig = True

sort_by = ['is_ophys','project_code','session_type','behavior_session_id']

for title,is_ophys in zip(['ophys_only','behavior_only'],[True,False]):
    fig = sdk_validation.make_sdk_heatmap(
        validation_results[
            (validation_results['in_filtered_table']==True)
            &(pd.notnull(validation_results['session_type']))
            &(validation_results['is_ophys'] == is_ophys)
        ].sort_values(by=sort_by),
        title_addendum = ' - {} - '.format(title)
    )
    if savefig:
        fig.write_html("/home/dougo/code/dougollerenshaw.github.io/figures_to_share/sdk_validation_matrix_{}.html".format(title))
    fig.show()

# View some error logs

### start with one of the mesoscope sessions, 974634733

In [None]:
# behavior_session_id = 886424031
behavior_session_id = 873813922

sdk_validation.get_validation_results(behavior_session_id)

In [None]:
error_log = sdk_validation.get_error_logs(behavior_session_id).drop(columns=['_id'])
error_log

In [None]:
print(error_log.iloc[26]['traceback'])

In [None]:
query = {
        "traceback": {
        "$regex": 'ValueError: No photodiode events found. Please check the input data for errors. ',
        "$options" :'i' # case-insensitive
    }
}
conn = db.Database('visual_behavior_data')
matching_errors = pd.DataFrame(list(conn['sdk_validation']['error_logs'].find(query))).drop(columns='_id')
conn.close()

In [None]:
matching_errors.query('sdk_version == "1.7.1"')