In [44]:
from visual_behavior.utilities import EyeTrackingData
import visual_behavior.database as db
import time
from multiprocessing import Pool

%widescreen
%standard_imports
%matplotlib notebook

imported:
	os
	pandas as pd
	numpy as np
	matplotlib.pyplot as plt


## custom functions

In [38]:
def get_eye_data_fraction_na(ophys_experiment_id):
    '''get fraction of rows = NA in eye tracking object'''
    ophys_session_id = db.convert_id({'ophys_experiment_id':ophys_experiment_id}, 'ophys_session_id')
    eye_data = EyeTrackingData(ophys_session_id)
    return eye_data.ellipse_fits['pupil']['area'].isna().mean()

In [39]:
def get_ellipse_fit_fraction_na(ophys_experiment_id):
    '''get fraction of rows = NA in ellipse fit file'''
    ophys_session_id = db.convert_id({'ophys_experiment_id':ophys_experiment_id}, 'ophys_session_id')
    well_known_files = db.get_well_known_files(ophys_session_id).set_index('name')
    eye_tracking_ellipses_entry = well_known_files.loc['EyeTracking Ellipses']
    eye_tracking_ellipses_path = ''.join([eye_tracking_ellipses_entry['storage_directory'], eye_tracking_ellipses_entry['filename']])
    eye_tracking_ellipses = pd.read_hdf(eye_tracking_ellipses_path, key = '/pupil')
    return eye_tracking_ellipses['center_x'].isna().mean()

In [43]:
def get_both_fractions(ophys_experiment_id):
    print('on {}'.format(ophys_experiment_id))
    t0 = time.time()
    ans = {
        'ophys_experiment_id':ophys_experiment_id, 
        'fraction_na_in_processed_eye_data':get_eye_data_fraction_na(ophys_experiment_id),
        'fraction_na_in_ellipse_fit_file':get_ellipse_fit_fraction_na(ophys_experiment_id),
    }
    print('done with {}, that took {} seconds'.format(ophys_experiment_id, time.time() - t0))
    return ans

## ophys experiment IDs (from Peter L via slack)

In [48]:
oeids = [
    503772253, 
    509580400, 
    509841198, 
    510021399, 
    510390912, 
    510699005, 
    510814438, 
    510524416, 
    512326618, 
    503324629, 
    502483554, 
    504642019, 
    506144725, 
    506773185, 
    507304910, 
    510214538, 
    511458599, 
    511573879
]

## get fraction NA in processed result and ellipse files (parallelized)

In [49]:

with Pool(32) as pool:
    ans = pool.map(get_both_fractions, oeids)
summary_df = pd.DataFrame(ans)

on 509841198
on 503772253
on 510699005
on 509580400
on 510390912
on 510021399
on 512326618
on 510214538
on 503324629
on 510814438
on 504642019
on 502483554
on 506773185
on 507304910
on 510524416
on 506144725
on 511458599
on 511573879


  return (a - mns) / sstd
  return (a - mns) / sstd
  return (a - mns) / sstd
  return (a - mns) / sstd


done with 510814438, that took 15.507771492004395 seconds


  return (a - mns) / sstd
  return (a - mns) / sstd
  return (a - mns) / sstd


done with 503772253, that took 15.595768690109253 seconds
done with 504642019, that took 16.197500228881836 seconds
done with 510021399, that took 16.6600661277771 seconds
done with 503324629, that took 16.66206645965576 seconds
done with 506144725, that took 16.666584014892578 seconds
done with 510524416, that took 16.718434810638428 seconds


  return (a - mns) / sstd
  return (a - mns) / sstd


done with 502483554, that took 17.660895586013794 seconds


  return (a - mns) / sstd
  return (a - mns) / sstd
  return (a - mns) / sstd
  return (a - mns) / sstd


done with 509841198, that took 17.824145555496216 seconds


  return (a - mns) / sstd
  return (a - mns) / sstd


done with 511573879, that took 17.88774561882019 seconds
done with 510214538, that took 18.199673652648926 seconds
done with 510390912, that took 18.202727794647217 seconds
done with 511458599, that took 18.1720449924469 seconds
done with 509580400, that took 18.212042570114136 seconds
done with 512326618, that took 18.271979808807373 seconds


  return (a - mns) / sstd
  return (a - mns) / sstd


done with 510699005, that took 18.90243887901306 seconds


  return (a - mns) / sstd


done with 506773185, that took 18.96406054496765 seconds
done with 507304910, that took 19.119776248931885 seconds


## compare
(fraction missing is same in both)

In [50]:
summary_df

Unnamed: 0,ophys_experiment_id,fraction_na_in_processed_eye_data,fraction_na_in_ellipse_fit_file
0,503772253,0.425012,0.425012
1,509580400,0.031192,0.031192
2,509841198,0.033101,0.033101
3,510021399,0.020532,0.020532
4,510390912,0.095661,0.095661
5,510699005,0.187732,0.187732
6,510814438,0.022044,0.022044
7,510524416,0.013853,0.013853
8,512326618,0.00013,0.00013
9,503324629,0.237887,0.237887
