# Polus-data

In [1]:
from polus.data import Collection, collections

To list all Collections present in storage directory:

In [2]:
collections.list

['BBBC004',
 'BBBC010',
 'BBBC033',
 'BBBC039',
 'Hansen2019Iowa',
 'MaricRatBrain2019',
 'Mark2021NF-kB',
 'Nadia2017ImportTest',
 'SchaubHotaling2020Features',
 'Schwendy',
 'TissueNet',
 'Ty2016Fillipin',
 'Ty2017Alpha1',
 'Ty2017ERDislocation',
 'Ty2018EOSKNkinome',
 'Ty2018Transomic',
 'sod']

To select a `Collection`, there are multiple methods:

### 1. collections.MaricRatBrain2019

In [3]:
m1 = collections.MaricRatBrain2019 # too many



### 2. collection['MaricRatBrain2019']

In [4]:
m2 = collections['MaricRatBrain2019']



### 3. Collection("MaricRatBrain2019")

In [5]:
m3 = Collection("MaricRatBrain2019")



In [6]:
m1.model == m2.model == m3.model

True

## List all Datasets in the Collection

In [7]:
m1.datasets

['raw', 'standard', 'subset', 'fovs']

In [8]:
m1.raw

GenericDataScheme(intensity=Data(path=PosixPath('/Users/camilovelezr/polus-storage/images/MaricRatBrain2019/raw/intensity'), description='Original czi files from each of 5 different staining rounds', wipp_type=<WippType.genericData: 'genericData'>, tags=['intensity', 'czi']), metadata=Data(path=PosixPath('/Users/camilovelezr/polus-storage/images/MaricRatBrain2019/raw/metadata'), description='Additional information from Dragan', wipp_type=<WippType.genericData: 'genericData'>, tags=['metadata']))

In [9]:
m1.raw.intensity

Data(path=PosixPath('/Users/camilovelezr/polus-storage/images/MaricRatBrain2019/raw/intensity'), description='Original czi files from each of 5 different staining rounds', wipp_type=<WippType.genericData: 'genericData'>, tags=['intensity', 'czi'])

In [10]:
m1.subset.intensity

WippData(path=PosixPath('/Users/camilovelezr/polus-storage/images/MaricRatBrain2019/subset/intensity'), description='A 5x5 grid of field of views for 11 channels and 2 replicates', wipp_type=<WippType.collection: 'collection'>, tags=['intensity', 'fluorescence', 'phase_contrast', 'stitching', 'bleedthrough', 'flatfield', 'nuclear_segmentation'], patterns={'all': 'S1_R{r}_C1-C11_A1_y0{yy}_x0{xx}_c0{cc}.ome.tif', 'DAPI': 'S1_R{r}_C1-C11_A1_y0{yy}_x0{xx}_c000.ome.tif', 'phase-contrast': 'S1_R{r}_C1-C11_A1_y0{yy}_x0{xx}_c010.ome.tif'})

Since we currently have no images downloaded, we get `str` for the patterns in `subset.intensity` (although they actually represent `FilePattern` objects)

In [12]:
m1.subset.intensity.patterns

{'all': 'S1_R{r}_C1-C11_A1_y0{yy}_x0{xx}_c0{cc}.ome.tif',
 'DAPI': 'S1_R{r}_C1-C11_A1_y0{yy}_x0{xx}_c000.ome.tif',
 'phase-contrast': 'S1_R{r}_C1-C11_A1_y0{yy}_x0{xx}_c010.ome.tif'}

We can fix this by downloading the dataset:

## Download directly from Python (just like `dvc pull`)

In [21]:
m1.fetch("subset")

A       subset/
1 file added and 550 files fetched


## FilePatterns
When initializing a `Collection` for which files live locally in the polus-storage directory, `FilePattern` objects will be created for such files:

In [14]:
m_new = collections.MaricRatBrain2019



In [15]:
m_new.subset.intensity.patterns

{'all': <filepattern.classes.FilePattern at 0x7fd3680e6e80>,
 'DAPI': <filepattern.classes.FilePattern at 0x7fd3680e6f40>,
 'phase-contrast': <filepattern.classes.FilePattern at 0x7fd389931130>}

In [16]:
m_new.subset.intensity.patterns['all'].pattern

'S1_R{r}_C1-C11_A1_y0{yy}_x0{xx}_c0{cc}.ome.tif'

In [17]:
m_new.subset.intensity.patterns['all'].path

PosixPath('/Users/camilovelezr/polus-storage/images/MaricRatBrain2019/subset/intensity')

## Paths

Full paths are returned, no need to join root path with relative paths

In [18]:
m_new.standard.intensity.path

PosixPath('/Users/camilovelezr/polus-storage/images/MaricRatBrain2019/standard/intensity')

In [19]:
m_new.subset.intensity.path

PosixPath('/Users/camilovelezr/polus-storage/images/MaricRatBrain2019/subset/intensity')

## Tags as list

In [20]:
m_new.standard.intensity.tags

['intensity', 'fluorescence', 'phase_contrast']

## Automatic clean-up

m_new.clean()