In [26]:
import os
import numpy as np
import pandas as pd
import skimage
import sys

In [2]:
def list_scans(data_folder, keyword):
    # Find folders containing the keyword
    scan_folders = [folder for folder in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, folder)) and keyword in folder]

    # Print list of found folders
    print("Folders containing '{}' keyword:")
    for i, folder in enumerate(scan_folders):
        print(f"{i + 1}. {folder}")

    # Prompt user to choose a folder
    while True:
        choice = input("Enter the number of the scan you want to choose: ")
        if choice.isdigit() and 1 <= int(choice) <= len(scan_folders):
            chosen_folder = scan_folders[int(choice) - 1]
            break
        else:
            print("Invalid input. Please enter a valid number.")
    print("Selected",chosen_folder)
    # Return the path to the chosen folder
    return os.path.join(data_folder, chosen_folder), chosen_folder


In [3]:
def estimate_similarity_transform(ref,points):
    '''
    
    ref = np.vstack([landmarks_im['x'],landmarks_im['y']]).T
    match = point_stream.data    
    cor = np.vstack([match['x'],match['y']]).T
    
    M = estimate_similarity_transform(ref, cor)
    
    Joao Couto - wfield (2020)
    '''
    from skimage.transform import SimilarityTransform
    M = SimilarityTransform()
    M.estimate(ref,points)
    return M

In [38]:
def atlas_from_landmarks_file(landmarks_file=None,
                              reference='dorsal_cortex',
                              dims = [540,640],
                              do_transform = None):
    '''
    Load the atlas regions, and area names and brain mask

    Joao Couto - wfield 2020
    '''
    lmarks = load_allen_landmarks(landmarks_file)
    ccf_regions,proj,brain_outline = allen_load_reference('dorsal_cortex')
    # transform the regions into the image
    if not 'transform' in lmarks.keys():
        lmarks['transform'] = None
    transform = lmarks['transform']
    if not do_transform:
        transform = None
    nccf_regions = allen_transform_regions(transform,
                                           ccf_regions,
                                           resolution=lmarks['resolution'],
                                           bregma_offset=lmarks['bregma_offset'])
    nbrain_outline = apply_affine_to_points(brain_outline[:,0]/lmarks['resolution'] + lmarks['bregma_offset'][0],
                                            brain_outline[:,1]/lmarks['resolution'] + lmarks['bregma_offset'][1],
                                            transform)


    atlas,areanames = allen_regions_to_atlas(nccf_regions, dims)
    brain_mask = contour_to_mask(*nbrain_outline, dims = dims)
    return atlas, areanames, brain_mask


In [39]:
def allen_load_reference(reference_name, annotation_dir):
    '''
Load allen areas to use as reference.

Example:
    ccf_regions,proj,brain_outline = allen_load_reference('dorsal_cortex')

    Joao Couto - wfield, 2020
    '''
    if annotation_dir == wfield_dir:
        # then it is the reference folder, download if not there
        if not os.path.exists(pjoin(
                annotation_dir,
                '{0}_ccf_labels.json'.format(reference_name))):
            from .utils import _create_wfield_folder
            _create_wfield_folder()
    from pandas import read_json
    ccf_regions = read_json(pjoin(
        annotation_dir,'{0}_ccf_labels.json'.format(reference_name)))
    proj = np.load(pjoin(annotation_dir,
                         '{0}_projection.npy'.format(reference_name),))
    brain_outline = np.load(pjoin(annotation_dir,
                                  '{0}_outline.npy'.format(reference_name)))
    return ccf_regions,proj,brain_outline

NameError: name 'annotation_dir' is not defined

In [5]:
def contour_to_mask(x,y,dims,extent = None,n_up_samples = 2000):
    '''
    Create a mask from a contour
    
    Usage:    
        H = contour_to_mask(x,y,dims,extent = None,n_up_samples = 2000)
    
    Joao Couto - wfield, 2020        
    '''

    H = contour_to_im(x=x, y=y, 
                      dims = dims,
                      extent = extent,
                      n_up_samples = n_up_samples)    
    # fix border cases
    #if np.sum(H[0,:]):
    #    H[0,:] = np.uint8(1)
    #if np.sum(H[-1,:]):
    #    H[-1,:] = np.uint8(1)
    #if np.sum(H[:,0]):
    #    H[:,0] = np.uint8(1)
    #if np.sum(H[:,-1]):
    #    H[:,-1] = np.uint8(1)
    from scipy.ndimage import morphology
    H = morphology.binary_dilation(H)
    H = morphology.binary_fill_holes(H)
    H = morphology.binary_erosion(H)
    H[0,:] = 0
    H[-1,:] = 0
    H[:,0] = 0
    H[:,-1] = 0
    return H.astype(bool)


In [6]:
def allen_regions_to_atlas(ccf_regions,dims,
                           sides = ['left','right'],
                           fillnan = False):
    ''' 
    Atlas as called in locaNMF; it is the masks of allen areas.
    This function returns also the names of the areas

    Joao Couto - wfield 2020
    '''
    atlas = np.zeros(dims,dtype = np.float32)
    if fillnan:
        atlas.fill(np.nan)
    areanames = []
    for ireg,r in ccf_regions.iterrows():
        for iside,side in enumerate(sides):
            mask = contour_to_mask(
                r[side+'_x'],r[side+'_y'],
                dims = dims)
            factor = 1
            if iside==1:
                factor = -1
            atlas[mask==1] = factor*(ireg+1)
            areanames.append([factor*(ireg+1),r['acronym']+'_'+side])
    return atlas,areanames

In [7]:
def apply_affine_to_points(x,y,M):
    '''
    Apply an affine transform to a set of contours or (x,y) points.

    x,y = apply_affine_to_points(x, y, tranform)

    Joao Couto - wfield (2020)
    '''
    if M is None:
        nM = np.identity(3,dtype = np.float32)
    else:
        nM = M.params
    xy = np.vstack([x,y,np.ones_like(y)])
    res = (nM @ xy).T
    return res[:,0],res[:,1]

In [8]:
def allen_transform_from_landmarks(landmarks_im,match):
    '''
    Compute the similarity transform from annotated landmarks. 
    
    transform = allen_transform_from_landmarks(landmarks_im,match)
    
    '''
    ref = np.vstack([landmarks_im['x'],landmarks_im['y']]).T
    cor = np.vstack([match['x'],match['y']]).T
    return estimate_similarity_transform(ref, cor)

In [9]:
def im_apply_transform(im,M,dims = None):
    '''
    Applies an affine transform M to an image.
    nim = im_apply_transform(im,M)

    Joao Couto - wfield, 2020
    '''
    if issparse(im):
        # then reshape before
        if dims is None:
            raise ValueError('Provide dims when warping sparse matrices.')
        shape = im.shape
        tmp  = np.asarray(im.todense()).reshape(dims)
        tmp = warp(tmp,M,
                   order = 1,
                   mode='constant',
                   cval = 0,
                   clip = True,
                   preserve_range = True)
        return csr_matrix(tmp.reshape(shape))
    else:    
        return warp(im,M,
                    order = 1,
                    mode='constant',
                    cval = 0,
                    clip=True,
                    preserve_range=True)

In [10]:
def im_apply_affine(im,transform):
    W,H = im.shape
    M = transform.params[:2,:]
    return cv2.warpAffine(im, M, (H, W),cv2.WARP_INVERSE_MAP)
def get_U_atlas(U,M):
    U = U.copy()
    U[:,0,:] = 1e-10
    U[0,:,:] = 1e-10
    U[-1,:,:] = 1e-10
    U[:,-1,:] = 1e-10

    # transpose U
    return np.stack(runpar(im_apply_affine, U.transpose([2,0,1]),
                           transform = M)).transpose([1,2,0]).astype(np.float32)

In [11]:
def load_allen_landmarks(filename, reference = 'dorsal_cortex'):
    '''
    lmarks = load_allen_landmarks(filename, reference = 'dorsal_cortex'):
    
    Loads an allen landmark file (json) and returns the transform objects if present.
    Joao Couto - wfield 2020
    '''
    if filename is None:
        filename = pjoin(annotation_dir,reference + '_landmarks.json')
    if not os.path.exists(filename):
        if '.wfield' in filename:
            from .utils import _create_wfield_folder
            _create_wfield_folder()
        else:
            raise(OSError('Could not find the reference file {0}.'.format(filename)))

    with open(filename,'r') as fd:
        import json
        lmarks = json.load(fd)
    for k in ['landmarks_im','landmarks','landmarks_match']:
        if k in lmarks.keys():
            from pandas import DataFrame
            lmarks[k] = DataFrame(lmarks[k])[['x','y','name','color']]
    if 'transform' in lmarks.keys():
        if not 'transform_type' in lmarks.keys():
            lmarks['transform_type'] = 'euclidian'
        if lmarks['transform_type'] == 'affine':
            from skimage.transform import AffineTransform
            lmarks['transform'] = AffineTransform(
                np.array(lmarks['transform']))
        else: # use similarity
            from skimage.transform import SimilarityTransform
            lmarks['transform'] = SimilarityTransform(
                np.array(lmarks['transform']))
        if 'transform_inverse' in lmarks.keys():
            if lmarks['transform_type'] == 'affine':
                from skimage.transform import AffineTransform
                lmarks['transform_inverse'] = AffineTransform(
                    np.array(lmarks['transform_inverse']))
            else: # use similarity
                from skimage.transform import SimilarityTransform
                lmarks['transform_inverse'] = SimilarityTransform(
                    np.array(lmarks['transform_inverse']))
    return lmarks

In [12]:
def compute_locaNMF(U,V,atlas,brain_mask,
                    minrank = 1, # rank = how many components per brain region.
                    maxrank = 10, #Set maxrank to around 10 for regular dataset.
                    min_pixels = 100, # minimum number of pixels in Allen map for it to be considered a brain region
                    loc_thresh = 70, # Localization threshold, i.e. percentage of area restricted to be inside the 'atlas boundary'
                    r2_thresh = 0.99, # Fraction of variance in the data to capture with LocaNMF
                    nonnegative_temporal = False, # Do you want nonnegative temporal components? The data itself should also be nonnegative in this case.
                    maxiter_lambda = 300,
                    device = 'auto',
                    verbose = [True, False, False]):
    '''
This function runs locaNMF from wfield analysis outputs.
It uses the original package for LocaNMF, written by Ian Kinsella and Shreya Saxena
Reference: 
    Saxena S, Kinsella I, Musall S, Kim SH, Meszaros J, et al. (2020) 
    Localized semi-nonnegative matrix factorization (LocaNMF) of widefield calcium imaging data. 
    PLOS Computational Biology 16(4): e1007791. https://doi.org/10.1371/journal.pcbi.1007791

Usage:
    
    A,C,regions = compute_locaNMF(U,V,atlas,brain_mask,
                    minrank = 1, 
                    maxrank = 10, 
                    min_pixels = 100,
                    loc_thresh = 70, 
                    r2_thresh = 0.99,
                    device = 'cuda')
    
    Joao Couto - wfield, 2023
    '''
    try:
        from locanmf import LocaNMF
    except Exception as err:
        print(err)
        raise(OSError("This analysis requires the locaNMF package."))
    
    import torch
    if device == 'auto':
        if torch.cuda.is_available():
            device = 'cuda'
        else:
            print('torch could not find a cuda capable GPU, using the CPU (slower).')
            device = 'cpu'
            
    rank_range = (minrank, maxrank, 1)
    if nonnegative_temporal:
        r = V.T
    else:
        q, r = np.linalg.qr(V.T)
    video_mats = (np.copy(U[brain_mask]), r.T)
    del U

    region_mats = LocaNMF.extract_region_metadata(brain_mask, atlas, min_size=min_pixels)
    region_metadata = LocaNMF.RegionMetadata(region_mats[0].shape[0],
                                               region_mats[0].shape[1:],
                                               device=device)

    region_metadata.set(torch.from_numpy(region_mats[0].astype(np.uint8)),
                        torch.from_numpy(region_mats[1]),
                        torch.from_numpy(region_mats[2].astype(np.int64)))

    # Do SVD
    if device=='cuda': torch.cuda.synchronize()
    region_videos = LocaNMF.factor_region_videos(video_mats,
                                                   region_mats[0],
                                                   rank_range[1],
                                                   device=device)
    if device=='cuda': torch.cuda.synchronize()
    low_rank_video = LocaNMF.LowRankVideo(
        (int(np.sum(brain_mask)),) + video_mats[1].shape, device=device)
    low_rank_video.set(torch.from_numpy(video_mats[0].T),
                       torch.from_numpy(video_mats[1]))
    if device=='cuda': torch.cuda.synchronize()
    locanmf_comps = LocaNMF.rank_linesearch(low_rank_video,
                                            region_metadata,
                                            region_videos,
                                            maxiter_rank = maxrank-minrank+1,
                                            maxiter_lambda = maxiter_lambda, 
                                            maxiter_hals = 20,
                                            lambda_step = 1.35,
                                            lambda_init = 1e-6, 
                                            loc_thresh = loc_thresh,
                                            r2_thresh = r2_thresh,
                                            rank_range = rank_range,
                                            nnt = nonnegative_temporal,
                                            verbose = verbose,
                                            sample_prop = (1,1),
                                            device = device)
    if device=='cuda': torch.cuda.synchronize()
    # Get LocaNMF spatial and temporal components
    A = locanmf_comps.spatial.data.cpu().numpy().T
    A_reshape = np.zeros((brain_mask.shape[0],brain_mask.shape[1],A.shape[1])); A_reshape.fill(np.nan)
    A_reshape[brain_mask,:] = A

    if nonnegative_temporal:
        C = locanmf_comps.temporal.data.cpu().numpy()
    else:
        C = np.matmul(q,locanmf_comps.temporal.data.cpu().numpy().T).T

    regions = region_metadata.labels.data[locanmf_comps.regions.data].cpu().numpy()

    if device=='cuda':
        torch.cuda.empty_cache()
    
    return A_reshape,C,regions


In [13]:
def list_scans(data_folder, keyword):
    # Find folders containing the keyword
    scan_folders = [folder for folder in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, folder)) and keyword in folder]

    # Print list of found folders
    print("Folders containing '{}' keyword:")
    for i, folder in enumerate(scan_folders):
        print(f"{i + 1}. {folder}")

    # Prompt user to choose a folder
    while True:
        choice = input("Enter the number of the scan you want to choose: ")
        if choice.isdigit() and 1 <= int(choice) <= len(scan_folders):
            chosen_folder = scan_folders[int(choice) - 1]
            break
        else:
            print("Invalid input. Please enter a valid number.")
    print("Selected",chosen_folder)
    # Return the path to the chosen folder
    return os.path.join(data_folder, chosen_folder), chosen_folder


In [16]:
def mmap_dat(filename,
             mode = 'r',
             nframes = None,
             shape = None,
             dtype='uint16'):
    '''
    Loads frames from a binary file as a memory map.
    This is useful when the data does not fit to memory.
    
    Inputs:
        filename (str)       : fileformat convention, file ends in _NCHANNELS_H_W_DTYPE.dat
        mode (str)           : memory map access mode (default 'r')
                'r'   | Open existing file for reading only.
                'r+'  | Open existing file for reading and writing.                 
        nframes (int)        : number of frames to read (default is None: the entire file)
        offset (int)         : offset frame number (default 0)
        shape (list|tuple)   : dimensions (NCHANNELS, HEIGHT, WIDTH) default is None
        dtype (str)          : datatype (default uint16) 
    Returns:
        A memory mapped  array with size (NFRAMES,NCHANNELS, HEIGHT, WIDTH).

    Example:
        dat = mmap_dat(filename)
    '''
    
    if not os.path.isfile(filename):
        raise OSError('File {0} not found.'.format(filename))
    if shape is None or dtype is None: # try to get it from the filename
        dtype,shape,_ = _parse_binary_fname(filename,
                                            shape = shape,
                                            dtype = dtype)
    if type(dtype) is str:
        dt = np.dtype(dtype)
    else:
        dt = dtype
    if nframes is None:
        # Get the number of samples from the file size
        nframes = int(os.path.getsize(filename)/(np.prod(shape)*dt.itemsize))
    dt = np.dtype(dtype)
    return np.memmap(filename,
                     mode=mode,
                     dtype=dt,
                     shape = (int(nframes),*shape))

In [24]:
def join(a, *p):
    """Join two or more pathname components, inserting '/' as needed.
    If any component is an absolute path, all previous path components
    will be discarded.  An empty last part will result in a path that
    ends with a separator."""
    a = os.fspath(a)
    sep = _get_sep(a)
    path = a
    try:
        if not p:
            path[:0] + sep  #23780: Ensure compatible data type even if p is null.
        for b in map(os.fspath, p):
            if b.startswith(sep):
                path = b
            elif not path or path.endswith(sep):
                path += b
            else:
                path += sep + b
    except (TypeError, AttributeError, BytesWarning):
        genericpath._check_arg_types('join', a, *p)
        raise
    return path

def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
        include_hidden=False):
    """Return a list of paths matching a pathname pattern.

    The pattern may contain simple shell-style wildcards a la
    fnmatch. Unlike fnmatch, filenames starting with a
    dot are special cases that are not matched by '*' and '?'
    patterns by default.

    If `include_hidden` is true, the patterns '*', '?', '**'  will match hidden
    directories.

    If `recursive` is true, the pattern '**' will match any files and
    zero or more directories and subdirectories.
    """
    return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive,
                      include_hidden=include_hidden))
def _get_sep(path):
    if isinstance(path, bytes):
        return b'/'
    else:
        return '/'

def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
          include_hidden=False):
    """Return an iterator which yields the paths matching a pathname pattern.

    The pattern may contain simple shell-style wildcards a la
    fnmatch. However, unlike fnmatch, filenames starting with a
    dot are special cases that are not matched by '*' and '?'
    patterns.

    If recursive is true, the pattern '**' will match any files and
    zero or more directories and subdirectories.
    """
    sys.audit("glob.glob", pathname, recursive)
    sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
    if root_dir is not None:
        root_dir = os.fspath(root_dir)
    else:
        root_dir = pathname[:0]
    it = _iglob(pathname, root_dir, dir_fd, recursive, False,
                include_hidden=include_hidden)
    if not pathname or recursive and _isrecursive(pathname[:2]):
        try:
            s = next(it)  # skip empty string
            if s:
                it = itertools.chain((s,), it)
        except StopIteration:
            pass
    return it

In [30]:
def _parse_binary_fname(fname,lastidx=None, dtype = 'uint16', shape = None, sep = '_'):
    '''
    Gets the data type and the shape from the filename 
    This is a helper function to use in load_dat.
    
    out = _parse_binary_fname(fname)
    
    With out default to: 
        out = dict(dtype=dtype, shape = shape, fnum = None)
    '''
    fn = os.path.splitext(os.path.basename(fname))[0]
    fnsplit = fn.split(sep)
    fnum = None
    if lastidx is None:
        # find the datatype first (that is the first dtype string from last)
        lastidx = -1
        idx = np.where([not f.isnumeric() for f in fnsplit])[0]
        for i in idx[::-1]:
            try:
                dtype = np.dtype(fnsplit[i])
                lastidx = i
            except TypeError:
                pass
    if dtype is None:
        dtype = np.dtype(fnsplit[lastidx])
    # further split in those before and after lastidx
    before = [f for f in fnsplit[:lastidx] if f.isdigit()]
    after = [f for f in fnsplit[lastidx:] if f.isdigit()]
    if shape is None:
        # then the shape are the last 3
        shape = [int(t) for t in before[-3:]]
    if len(after)>0:
        fnum = [int(t) for t in after]
    return dtype,shape,fnum

In [14]:
data_folder = r'/datajoint-data/data/aeltona/'
# tif_file_path = pjoin(data_folder, 'scan9FN2ANVG_Oddball_AA_ROS-1706_2025_MMStack_Default.ome.tif')
# localdisk = r'C:\datatemp'
localdisk, scan_idx = list_scans(data_folder,"AA")



Folders containing '{}' keyword:
1. Habituation_AA_WEZ-8950_2024-04-16_scan9FNN1Y64_sess9FNN0LFP
2. Habituation_AA_WEZ-8948_2024-04-17_scan9FNNP1N7_sess9FNNO3Z1
3. Oddball_AA_ROS-1706_2024-03-12_scan9FN2BCOS_sess9FN2ANVG
4. AA_ROS-1688_2024_01_27_scan000WQU9_sess000EAEIO
5. Habituation_AA_WEZ-8950_2024-04-18_scan9FNO8CLT_sess9FNO8CLT
6. Habituation_AA_WEZ-8950_2024-04-17_scan9FNNO3Z1_sess9FNNO3Z1
7. AA_ROS-1706_2024-03-12_scan9FN2ANVG_sess9FN2ANVG
8. Habituation_AA_WEZ-8950_2024-04-16_scan9FNN1YXK_sess9FNN1YXK
9. Habituation_AA_WEZ-8948_2024-04-18_scan9FNO99ZE_sess9FNO8CLT
10. AA_ROS-1688_2024_01_27_scan000EAEIO_sess000EAEIO
11. Habituation_AA_WEZ-8950_2024-04-16_scan9FNN2FX7_sess9FNN1YXK
12. Habituation_AA_WEZ-8948_2024-04-16_scan9FNN1M1R_sess9FNN0LFP
Selected AA_ROS-1706_2024-03-12_scan9FN2ANVG_sess9FN2ANVG


In [31]:
dat_path = os.path.join(localdisk,'scan9FN2ANVG_Oddball_AA_ROS-1706_600_600_2_uint16.dat')
dat = mmap_dat(dat_path)

In [36]:
lmarksfile = os.path.join(localdisk,'ccf_transform_landmarks.json')
lmarks = load_allen_landmarks(os.path.join(localdisk,'ccf_transform_landmarks.json'))

if len(lmarks):
    mask = np.zeros(dat.shape[-2::],dtype=bool)
    # from .allen import atlas_from_landmarks_file
    _, _, mask = atlas_from_landmarks_file(lmarksfile,dims = mask.shape, do_transform=True)
    print('Using the mask from the landmarks file for decomposition.')

NameError: name 'allen_load_reference' is not defined