## Learning overview 

### Import functions

In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from pathlib import Path
import uuid

from brainbox.task.trials import find_trial_ids
from brainbox.behavior.training import get_sessions, get_training_status

from one.api import ONE
from one.alf.files import add_uuid_string
from one.remote import aws

one = ONE(base_url='https://alyx.internationalbrainlab.org')

### Custom functions

In [2]:
"""
GET TRIALS INFO INTO ONE TABLE
"""

# Function written by Julia 
def download_subjectTables(one, subject=None, trials=True, training=True,
                           target_path=None, tag=None, overwrite=False, check_updates=True):
    """
    Function to download the aggregated clusters information associated with the given data release tag from AWS.
    Parameters
    ----------
    one: one.api.ONE
        Instance to be used to connect to database.
    trials: bool
        Whether to download the subjectTrials.table.pqt, default is True
    training: bool
        Whether to donwnload the subjectTraining.table.pqt, defaults is True
    subject: str, uuid or None
        Nickname or UUID of the subject to download all trials from. If None, download all available trials tables
        (associated with 'tag' if one is given)
    target_path: str or pathlib.Path
        Directory to which files should be downloaded. If None, downloads to one.cache_dir/aggregates
    tag: str
        Data release tag to download _ibl_subjectTrials.table datasets from. Default is None.
    overwrite : bool
        If True, will re-download files even if file exists locally and file sizes match.
    check_updates : bool
        If True, will check if file sizes match and skip download if they do. If False, will just return the paths
        and not check if the data was updated on AWS.
    Returns
    -------
    trials_tables: list of pathlib.Path
        Paths to the downloaded subjectTrials files
    training_tables: list of pathlib.Path
        Paths to the downloaded subjectTraining files
    """

    if target_path is None:
        target_path = Path(one.cache_dir).joinpath('aggregates')
        target_path.mkdir(exist_ok=True)
    else:
        assert target_path.exists(), 'The target_path you passed does not exist.'

    # Get the datasets
    trials_ds = []
    training_ds = []
    if subject:
        try:
            subject_id = uuid.UUID(subject)
        except ValueError:
            subject_id = one.alyx.rest('subjects', 'list', nickname=subject)[0]['id']
        if trials:
            trials_ds.extend(one.alyx.rest('datasets', 'list', name='_ibl_subjectTrials.table.pqt',
                                           django=f'object_id,{subject_id}'))
        if training:
            training_ds.extend(one.alyx.rest('datasets', 'list', name='_ibl_subjectTraining.table.pqt',
                                             django=f'object_id,{subject_id}'))
    else:
        if tag:
            if trials:
                trials_ds.extend(one.alyx.rest('datasets', 'list', name='_ibl_subjectTrials.table.pqt', tag=tag))
            if training:
                training_ds.extend(one.alyx.rest('datasets', 'list', name='_ibl_subjectTraining.table.pqt', tag=tag))
        else:
            if trials:
                trials_ds.extend(one.alyx.rest('datasets', 'list', name='_ibl_subjectTrials.table.pqt'))
            if training:
                training_ds.extend(one.alyx.rest('datasets', 'list', name='_ibl_subjectTraining.table.pqt'))

    # Set up the bucket
    s3, bucket_name = aws.get_s3_from_alyx(alyx=one.alyx)

    all_out = []
    for ds_list in [trials_ds, training_ds]:
        out_paths = []
        for ds in ds_list:
            relative_path = add_uuid_string(ds['file_records'][0]['relative_path'], ds['url'][-36:])
            src_path = 'aggregates/' + str(relative_path)
            dst_path = target_path.joinpath(relative_path)
            if check_updates:
                out = aws.s3_download_file(src_path, dst_path, s3=s3, bucket_name=bucket_name, overwrite=overwrite)
            else:
                out = dst_path

            if out and out.exists():
                out_paths.append(out)
            else:
                print(f'Downloading of {src_path} table failed.')
        all_out.append(out_paths)

    return all_out[0], all_out[1]


def query_subjects_interest(cutoff_date, lab, protocol='training', ibl_project='ibl_neuropixel_brainwide_01'):
    
    # Function to query subjects of interest based on task protocol and project

    """ Download session data """
    # Search sessions of interest
    sessions = one.search(task_protocol=protocol, project=ibl_project, details=True)
    session_details = sessions[1]
    
    """ List animals of interest"""
    subjects_interest = []
    for s, ses in enumerate(session_details):
        if (session_details[s]['lab']==lab) & (session_details[s]['date']>cutoff_date):
            
            nickname = session_details[s]['subject']
            subjects_interest = np.append(subjects_interest, nickname)

    subjects_interest = np.unique(subjects_interest)
    
    return subjects_interest


def subjects_interest_data(subjects_interest, phase, protocol):
    
    # Parameters
    # phase can be 'learning' or 'profficient'

    all_data = pd.DataFrame()
    # Loop through subjects and get data and training status for each
    for s, subject in enumerate(subjects_interest):

        subject_trials, subject_training = download_subjectTables(one, subject=subject, trials=True, training=True,
                            target_path=None, tag=None, overwrite=False, check_updates=True)

        # Check if there is data for this mouse
        if (len(subject_trials) > 0) & (len(subject_training) > 0):
            dsets = [subject_trials[0], subject_training[0]]
            files = [one.cache_dir.joinpath(x) for x in dsets]
            trials, training = [pd.read_parquet(file) for file in files]
            trials['subject_nickname'] = subject
            
            # Check if animal ever got trained
            if 'trained 1a' in training['training_status'].unique():
                training_date = list(training.loc[training['training_status']=='trained 1a'].reset_index()['date'])[0]
            elif 'trained 1b' in training['training_status'].unique():
                training_date = list(training.loc[training['training_status']=='trained 1b'].reset_index()['date'])[0]
            else:
                training_date = []

            # If animal got trained, include
            if len(training_date) > 0:
                # Check phase of interest
                if phase == 'learning':
                    # If learning keep all sessions until trained
                    subject_data = trials.loc[trials['session_start_time'] <= pd.to_datetime(training_date)]
                if phase == 'proficient':
                    # If proficient, take the date of trained_1b:
                    # Check if animal ever got trained
                    if 'trained 1b' in training['training_status'].unique():
                        training_1b = list(training.loc[training['training_status']=='trained 1b'].reset_index()['date'])[0]
                    else:
                        training_1b = []
                        
                    # Select protocol
                    if protocol == 'biased':
                        # If profficient keep all biased sessions after 1b
                        subject_data = trials.loc[(trials['session_start_time'] > pd.to_datetime(training_1b)) 
                                                & (trials['task_protocol'].apply(lambda x: x[14:18])=='bias')]
                    elif protocol == 'ephys':
                        # If profficient keep all biased sessions after 1b
                        subject_data = trials.loc[(trials['session_start_time'] > pd.to_datetime(training_1b)) 
                                                & (trials['task_protocol'].apply(lambda x: x[14:18])=='ephy')]
                    else:
                        print('Protocol not contemplated yet')
                        

                # Save to main dataframe
                if len(all_data) == 0:
                    all_data = subject_data
                else:
                    all_data = all_data.append(subject_data)
        else:
            print('No available data for subject ' + str(subject))

    return all_data

### Example use case

In [4]:
phase = 'learning'  # If learning keep all sessions until trained, else, keep sessions after 'trained'
protocol = 'training'  # which sessions to query: training or biased
data = subjects_interest_data(['ZFM-06440'], phase, protocol)

No available data for subject ZFM-06440


In [56]:
# Find subjects based on lab
cutoff_date = datetime.date(2022, 7, 27)  # Find mice with sessions after this date
lab = 'mainenlab'
subjects_of_interest = query_subjects_interest(cutoff_date, lab, protocol='training', ibl_project='ibl_neuropixel_brainwide_01')
# Then query their data
phase = 'learning'  # If learning keep all sessions until trained, else, keep sessions after 'trained'
protocol = 'training'  # which sessions to query: training or biased
data = subjects_interest_data(subjects_of_interest, phase, protocol)

No available data for subject ZFM-05229


/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05231/_ibl_subjectTrials.table.b9f8176f-b05d-443c-b090-4a740b20cbf5.pqt: 100%|██████████| 6.64M/6.64M [00:07<00:00, 890kB/s] 
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05231/_ibl_subjectTraining.table.1b703052-41dd-4615-abd5-75fe3e96bb15.pqt: 100%|██████████| 3.14k/3.14k [00:00<00:00, 9.95kB/s]
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05232/_ibl_subjectTrials.table.5b01a0a2-b21c-4043-9817-fa2c2538b287.pqt: 100%|██████████| 1.12M/1.12M [00:01<00:00, 852kB/s] 
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05232/_ibl_subjectTraining.table.a8ae9ca0-3bdd-49db-84ad-b944a3c42031.pqt: 100%|██████████| 2.94k/2.94k [00:00<00:00, 9.08kB/s]
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05234/_ibl_subjectTrials.table.d1101b6e-46

No available data for subject ZFM-05923


/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05925/_ibl_subjectTrials.table.3230000b-0029-4c7d-a0e6-ebb22b882eb0.pqt: 100%|██████████| 160k/160k [00:00<00:00, 193kB/s]
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05925/_ibl_subjectTraining.table.ed6a226d-5d28-417a-94c0-993babf63e16.pqt: 100%|██████████| 2.94k/2.94k [00:00<00:00, 9.08kB/s]
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05926/_ibl_subjectTrials.table.f1f7c770-10b0-481c-b178-8a53e6d5d15f.pqt: 100%|██████████| 2.03M/2.03M [00:01<00:00, 1.51MB/s]
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05926/_ibl_subjectTraining.table.e42f6843-9f68-420b-a1b9-81e60a397e2f.pqt: 100%|██████████| 3.25k/3.25k [00:00<00:00, 10.8kB/s]
  all_data = all_data.append(subject_data)


No available data for subject ZFM-05927


/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05928/_ibl_subjectTrials.table.9cd3193e-8a3f-4e9d-b32d-69d9223f0e1c.pqt: 100%|██████████| 86.6k/86.6k [00:00<00:00, 131kB/s]
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-05928/_ibl_subjectTraining.table.e2719a4c-e219-42a9-a311-a898f45b0347.pqt: 100%|██████████| 2.94k/2.94k [00:00<00:00, 8.13kB/s]


No available data for subject ZFM-06171
No available data for subject ZFM-06190
No available data for subject ZFM-06193


/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-06195/_ibl_subjectTrials.table.3fdfab2f-5c1c-462c-a68d-9bbfc7d1f14f.pqt: 100%|██████████| 1.20M/1.20M [00:02<00:00, 458kB/s]
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/aggregates/Subjects/mainenlab/ZFM-06195/_ibl_subjectTraining.table.4ee1c5b5-e72f-4ba5-804f-277538e8651a.pqt: 100%|██████████| 2.94k/2.94k [00:00<00:00, 8.98kB/s]


No available data for subject ZFM-06438
No available data for subject ZFM-06439
No available data for subject ZFM-06440


In [None]:
# Alternatively, directly query data from a specific mouse
data = subjects_interest_data(['KS014'])

### Now look at the data!