In [None]:
from ibl_pipeline import ephys, acquisition, subject, data
from tqdm import tqdm

In [None]:
# load eids of all brainwidemap_sessions
with open('notebooks_qc/brainwidemap_sessions.txt', 'r') as file:
    eids_raw = file.readlines()

eids = {eid.replace('\n', '') for eid in eids_raw}

In [None]:
len(eids)

In [None]:
# Check whether required datasets are missing
required_datasets = ephys.CompleteClusterSession.required_datasets
required_datasets

# sessions with complete datasets, but DefaultCluster data are missing
missing_sessions_with_complete_datasets = []
missing_sessions_with_incomplete_datasets = dict()
for eid in tqdm(eids):
    key = acquisition.Session & {'session_uuid': eid}
    if not ephys.DefaultCluster & key:
        missing_datasets = [
            dataset for dataset in required_datasets 
            if not data.FileRecord & key & {'dataset_name': dataset} & 'repo_name LIKE "flatiron_%"' & {'exists': 1}]
        if missing_datasets:
            missing_sessions_with_incomplete_datasets[eid] = missing_datasets
        else:
            missing_sessions_with_complete_datasets.append(eid)

print(f'Number of sessions with complete datasets but missing cluster data: \
      {len(missing_sessions_with_complete_datasets)}')

print(f'Number of sessions with incomplete datasets: \
      {len(missing_sessions_with_incomplete_datasets)}')

In [None]:
import numpy as np
np.save('missing_sessions', missing_sessions_with_complete_datasets, missing_sessions_with_incomplete_datasets)

In [None]:
import numpy as np
eids = np.load('missing_sessions.npy')

In [None]:
ephys.DefaultCluster.populate(
    acquisition.Session & [{'session_uuid': eid} for eid in eids], display_progress=True, suppress_errors=True)

In [None]:
# check with ONE whether these missing datasets are really missing
from oneibl.one import ONE
one = ONE()

for eid in tqdm(missing_sessions_with_incomplete_datasets.keys()):
    datasets = one.alyx.rest('datasets', 'list', session=eid)
    for d in datasets:
        if d['name'] in missing_sessions_with_incomplete_datasets[eid]:
            print(f'File {d["name"]} exists for session {eid}')

In [None]:
import alf.io

# check data length consistency of the 17 sessions

ephys_dtypes = [
    'clusters.amps',
    'clusters.channels',
    'clusters.depths',
    'clusters.metrics',
    'clusters.peakToTrough',
    'clusters.uuids',
    'clusters.waveforms',
    'clusters.waveformsChannels',
    'spikes.amps',
    'spikes.clusters',
    'spikes.depths',
    'spikes.samples',
    'spikes.templates'
]
for eid in missing_sessions_with_complete_datasets[0:1]:
    
    session_key = acquisition.Session & {'session_uuid': eid}
    probe_keys = (ephys.ProbeInsertion & session_key).fetch('KEY')
    
    for key in probe_keys:
        # load relevant data
        spikes_times_dtype_name = (
            data.FileRecord & key &
            'dataset_name like "%spikes.times%.npy"').fetch1(
                'dataset_name').split('.npy')[0]
        dtypes = ephys_dtypes + [spikes_times_dtype_name]
        
        files = one.load(eid, dataset_types=dtypes, download_only=True,
                         clobber=True)
        ses_path = alf.io.get_session_path(files[0])

        probe_name = (ephys.ProbeInsertion & key).fetch1('probe_label')

        clusters = alf.io.load_object(
            ses_path.joinpath('alf', probe_name), 'clusters')
        spikes = alf.io.load_object(
            ses_path.joinpath('alf', probe_name), 'spikes')

        time_fnames = [k for k in spikes.keys() if 'times' in k]

        
        # check clusters.* data, report if length does not match the length of clusters.uuids
        

In [None]:
# check what's happening with the 17 sessions
ephys.DefaultCluster.populate(acquisition.Session & [{'session_uuid': eid} for eid in missing_sessions_with_complete_datasets], display_progress=True)

In [None]:
alf.io.load_object(ses_path.joinpath('alf', probe_name), object='spikes')

In [None]:
import datajoint as dj
dj.U('session_uuid') & (acquisition.Session & ephys.DefaultCluster)

In [None]:
eid = 'f354dc45-caef-4e3e-bd42-2c19a5425114'

In [None]:
session_key = acquisition.Session & {'session_uuid': eid}
probe_keys = (ephys.ProbeInsertion & session_key).fetch('KEY')

for key in probe_keys:
    # load relevant data
    spikes_times_dtype_name = (
        data.FileRecord & key &
        'dataset_name like "%spikes.times.npy"').fetch1(
            'dataset_name').split('.npy')[0]
    dtypes = ephys_dtypes + [spikes_times_dtype_name]

    files = one.load(eid, dataset_types=dtypes, download_only=True,
                     clobber=True)
    ses_path = alf.io.get_session_path(files[0])

    probe_name = (ephys.ProbeInsertion & key).fetch1('probe_label')

    clusters = alf.io.load_object(
        ses_path.joinpath('alf', probe_name), 'clusters')
    spikes = alf.io.load_object(
        ses_path.joinpath('alf', probe_name), 'spikes')

    time_fnames = [k for k in spikes.keys() if 'times' in k]

In [None]:
clusters

In [None]:
from ibl_pipeline import subject, acquisition, ephys, behavior, data
from ibl_pipeline.plotting import ephys as ephys_plotting
from ibl_pipeline.group_shared import wheel
import datajoint as dj
from tqdm import tqdm

In [None]:
keys = (acquisition.Session & ephys.DefaultCluster).fetch('KEY')

In [None]:
ephys_plotting.Raster.populate(display_progress=True, suppress_errors=True)

In [None]:
import datajoint as dj

In [None]:
dj.conn().connect()

In [None]:
dj.kill()