In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib qt

# Extracting data from mesmerize batches
Reading data from mesmerize batches to Jupyter to work directly in Python. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os.path
import pandas as pd
import joblib
import ipywidgets as widgets

import caiman as cm
from caiman.source_extraction.cnmf.cnmf import load_CNMF

<a id="batch"></a>
# 1. Extract batch metadata
Each batch directory has a pickle file called `dataframe.batch` that contains all the metadata you will need about each batch that was run: the module (e.g., `CNMFE`), input parameters, the unique identifier for that batch (`uuid`), the `name` used for that batch. You can read this data using `pd.read_pickle()`:

**NOTE** for your `batch_dir` variable below you need to:

- copy/past in the path to your batch directory in your project folder.
- change the slashes to forward slashes.

In [None]:
batch_dir = # paste your full directory path here to batch, as a string
print(batch_dir)

In [None]:
batch_path = batch_dir + "dataframe.batch"
batch_df = pd.read_pickle(batch_path) 
batch_df.head()

To see what caiman modules you used:

In [None]:
for module in batch_df.module.unique():
    print(module)

To see which names your batch jobs have:

In [None]:
for ind, batch_name in enumerate(batch_df.name.unique()):
    print(f"batch {ind}: {batch_name}")

<a id="cnmfe"></a>
# 2. Extract CNMFE results
For each cnmfe run, the main files you will likely be interested in include:

- `uuid_results.hdf5`: the results of the analysis that let you reconstruct the estimates object
- `uuid_input.tiff`: input movie (motion corrected movie)
- `uuid_cn_filter.pikl`: correlation image (used for finding cnmfe params, but often useful in plotting)

Here we'll just walk through loading the movie, results, and correlation image.

**Dropdown tester (next three cells)**

In [None]:
batch_options = batch_df.name.unique()
batch_dropdown = widgets.Dropdown(
    options=batch_options,
    description='batch1: ',
    value=batch_options[0],
    disabled=False,)
display(batch_dropdown)

In [None]:
cnm_ind = batch_dropdown.index  # index among the cnmfe batches
cnm_batch = batch_df.iloc[cnm_ind]
cnm_uuid = cnm_batch.uuid
cnm_uuid_string = str(cnm_uuid)
print(f"Let's look at batch {batch_dropdown.index}: {batch_dropdown.value}")
print(f"Unique id: {cnm_uuid_string}")

### File check
Check that the files mentioned above actually exist. If you pick a batch where you did motion correction, or just the correlation/pnr map, for instance, the `hdf5` file will not exist.

In [None]:
cnm_results_filepath = batch_dir + cnm_uuid_string + '_results.hdf5'
cn_filepath = batch_dir + cnm_uuid_string + '_cn_filter.pikl'
cnm_movie_filepath = batch_dir + cnm_uuid_string + '_input.tiff'

# cnmfe results
if os.path.isfile(cnm_results_filepath):
    print(f"{cnm_results_filepath} exists")
else:
    print("No cnmfe results file: did you select a cnmfe batch?")

# correlation image array
if os.path.isfile(cn_filepath):
    print(f"{cn_filepath} exists")
else:
    print("No cn file")
    
# motion corrected movie array
if os.path.isfile(cnm_movie_filepath):
    print(f"{cnm_movie_filepath} exists")
else:
    print("No movie file")

## Reconstruct CNMF object
The hdf5 object that Mesmerize saves in the batch folder is the exact one that caiman uses for reconstructing the cmn object.  

In [None]:
#start a cluster for parallel processing 
num_cpus = 2
# note if a cluster already exists it will be closed so a new session will be opened
if 'dview' in locals():  # locals contains list of current local variables
    cm.stop_server(dview=dview)
c, dview, n_processes = cm.cluster.setup_cluster(backend='local', 
                                                 n_processes=num_cpus, 
                                                 single_thread=False,
                                                 ignore_preexisting=True)
#Number of nodes in cluster (keep this as separate cell you like to check on it sometimes)
print(f"Cluster has {n_processes} processes in the pool {type(dview)}");

Build the CNMF object from the hdf5 file:

In [None]:
cnm = load_CNMF(cnm_results_filepath, n_processes, dview=dview)
cnm.estimates.dims = cnm.dims

Now you have the full `CNMF` object that you would get if you had run things in caiman. You can access the various traces/components [as discussed in the documentation](https://caiman.readthedocs.io/en/master/Getting_Started.html#result-interpretation). There is also a [list of useful methods](https://caiman.readthedocs.io/en/master/core_functions.html#estimates). 

In [None]:
# get correlation image
with open(cn_filepath, 'rb') as f:
    corr_img = joblib.load(f)   

In [None]:
cnm.estimates.nb_view_components(img =  corr_img,
                                 cmap = 'gray',
                                 idx = cnm.estimates.idx_components,
                                 denoised_color = 'red',
                                 thr = 0.99);

Those are just a couple of examples. 

## Post-processing and exporting
So let's say we are happy with our components and their fluorescence traces. What then? Can we export them as CSV? What about these weird unitless fluorescence bulk traces? Can we convert them to something more useful? 

In caiman they are stored in `cnm.estimates.C`:

In [None]:
cnm.estimates.C.shape

In [None]:
pd.DataFrame(Cz.T).to_csv("fluor_data.csv", index=None)

Now ou have the data (one component per column) in a csv file that can be opened in Excel or another program (Matlab), or easily re-opened in Python with Pandas or Numpy! 

Obviously, you need to know the frame rate to get the time stamps. 