In [1]:
import numpy as np
import pandas as pd
import datashader as ds
import datashader.transfer_functions as tf
import holoviews as hv
import bz2
import copy
import pickle
import time
hv.extension('bokeh')

from holoviews import opts
from holoviews import streams
from holoviews.operation.datashader import datashade
from lxml import etree
from pathlib import Path
from shapely.geometry import MultiPolygon, Polygon, Point, LinearRing
from shapely.geometry.polygon import orient
from shapely.prepared import prep

import analysis_last_state as als
from ev_model import persistency, environment, elements
from ev_model.utilities import geometry as evmetry
%matplotlib inline

# Data analysis

Creating the topological elements we need for the analysis is not a trivial task. [...]  
This is done in an offline preprocessing step.  

## Preparation

Here we load the elements prepared for analytical and testing purposes.

| section | expected file size (MB) | target distance | target buffer|
|---:|:---:|:---:|:---:|
|`ampulla`|65|150|334|
|`infundibulum`|46|132|77|
|`ia-junction`|33|79|81|
|`utj`|7|37|34|
|`isthmus`|6| 15| 14|

From the list of user-created polygons, select the desired to load

In [20]:
targets = {}
targets['ampulla'] = {'buffer':334}
targets['infundibulum'] = {'buffer':77}
targets['ia-junction'] = {'buffer':80}
targets['utj'] = {'buffer':34}
targets['isthmus'] = {'buffer':14}
targets['edges'] = [v for v in np.arange(0.04, 0.17, 0.01)]
targets['sizes'] = [v for v in np.arange(0.04, 0.17, 0.01)]
polygons_available = als.display_polygons_available()

0 - user_polygons_ampulla_2020-01-22_full_regions_v1.pickle
1 - user_polygons_ia-junction_2020-01-20_21-27-47.pickle
2 - user_polygons_isthmus_2020-01-20_11-24-07.pickle
3 - user_polygons_isthmus_2020-01-19_09-25-32.pickle
4 - user_polygons_utj_2020-01-27_12-35-49.pickle
5 - user_polygons_ampulla_2020-01-27_full_regions_v2.pickle
6 - user_polygons_ampulla_2020-01-27_14-50-42.pickle


## Oviduct sections
Each of the key regions of the oviduct have specific parameters for analysis. These must be selected in the next step

### UTJ

In [54]:
section = 'utj'
# the distances provide will be used to identify EVs within (i-1, i]
distances_selected = [0, 5, 10, 15, 20, 25, 30, 33]
#target_iteration, replicates, base_path = 2880000, 5, '/home/cmoralesmx/ev_iters/v32/utj/apop_2h01,sec_09,dt01'
#target_iteration, replicates, base_path = 5760000, 5, '/home/cmoralesmx/ev_iters/v32/utj/apop_2h01,sec_09,dt01'
target_iteration, replicates, base_path = 2880000, 2, '/home/cmoralesmx/ev_iters/v32/utj/apop_2h1,sec_09,dt01'

polygons_to_load = 4

### Isthmus

In [73]:
# done
section = 'isthmus'
# the distances provide will be used to identify EVs within (i-1, i]
distances_selected = [0, 3, 6, 9]
base_path = '/home/cmoralesmx/ev_iters/v32/isth/apop_2h,sec_09,dt01'
replicates = 6
target_iteration = 2880000
#for 16 hours:
#target_iteration = 5760000
polygons_to_load = 2

### IA junction

In [22]:
# done
section = 'ia-junction'
# the distances provide will be used to identify EVs within (i-1, i]
distances_selected = [0, 10, 20, 30, 40, 50, 60, 70, 79]
base_path = '/home/cmoralesmx/ev_iters/v32/iaj/apop_2h,sec_09,dt01'
replicates = 5
target_iteration = 2880000
#for 16 hours: #target_iteration = 5760000
polygons_to_load = 1

### Ampulla

In [21]:
section = 'ampulla'
distances_selected = [0, 20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 240, 280, 320]
replicates, target_iteration, base_path = 6, 2880000, '/home/cmoralesmx/ev_iters/v32/amp/apop_2h01,sec_09,dt01'
#replicates, target_iteration, base_path = 6, 5760000, '/home/cmoralesmx/ev_iters/v32/amp/apop_2h01,sec_09,dt01'
#replicates, target_iteration, base_path = 4, 540000, '/home/cmoralesmx/ev_iters/v32/amp/apop-2h_sec-09_dt-01'
polygons_to_load = 6

# Analysis

## Regions of interest

Display any previously created polygons which can be loaded for analysis

In [22]:
analysis_setup = als.prepare_analysis(section, targets, distances_selected, base_path, replicates, target_iteration)
analysis_setup = als.load_evs(analysis_setup)
analysis_setup['user_polys_loaded'], analysis_setup['user_polys'] = als.select_and_load_polygons_specified(polygons_to_load, polygons_available)

n_evs_per_replicate = []
for l in analysis_setup['evs_d_useful']:
    ll = len(l)
    n_evs_per_replicate = ll
    print(ll)
'Mean evs per replicate',np.mean(n_evs_per_replicate), np.std(n_evs_per_replicate)

Polygons loaded from ./resources/analysis/shrinked_polygons_ampulla_334.pickle
334
334
EVs within [280-333)um from the epithelial tissue
EVs within [240-280)um from the epithelial tissue
EVs within [200-240)um from the epithelial tissue
EVs within [180-200)um from the epithelial tissue
EVs within [160-180)um from the epithelial tissue
EVs within [140-160)um from the epithelial tissue
EVs within [120-140)um from the epithelial tissue
EVs within [100-120)um from the epithelial tissue
EVs within [80-100)um from the epithelial tissue
EVs within [60-80)um from the epithelial tissue
EVs within [40-60)um from the epithelial tissue
EVs within [20-40)um from the epithelial tissue
EVs within [0-20)um from the epithelial tissue
Processing 6 replicates
Done loading data
Total EVs processed 1318662 oob 99
Polygons loaded from: user_polygons_ampulla_2020-01-27_14-50-42.pickle
220290
219263
220139
219821
219460
219596


('Mean evs per replicate', 219596.0, 0.0)

In [23]:
drawable, poly_streams = als.display_polygons_loaded(analysis_setup['user_polys'] if 'user_polys' in analysis_setup else None, analysis_setup['hvPolys_selected'], section)

%output size=400
drawable

## Size distribution per Region of Interest

*Note:* Prior to generating the polygons, we must execute the following command in the terminal.  
Otherwise, the export function will not work.  
```bash
export OPENSSL_CONF=/etc/ssl/
```

In [24]:
analysis_setup['user_polys'], analysis_setup['prep_polys'], analysis_setup['users_polys_loaded'] = als.save_polygons_for_reuse(poly_streams, analysis_setup['user_polys_loaded'], section)

The polygons did not change


In [25]:
output_dir = 'amp_apop2h01_more_regions_8h'

# try to load the pre computed counts from file
d = f'./resources/analysis/output/{output_dir}/evs_per_replicate_in_polygon.pickle.bz2'
with bz2.BZ2File(d, 'rb') as compressed_input_file:
    evs_per_replicate_in_polygon = pickle.load(compressed_input_file)
    print(f'evs_per_replicate_in_polygon loaded from {od}')
d = f'./resources/analysis/output/{output_dir}/evs_in_polygon_per_size.pickle.bz2'
with bz2.BZ2File(d, 'rb') as compressed_input_file:
    evs_in_polygon_per_size = pickle.load(compressed_input_file)
    print(f'evs_in_polygon_per_size loaded from {od}')
d = f'./resources/analysis/output/{output_dir}/evs_at_distance_in_polygons_per_size.pickle.bz2'
with bz2.BZ2File(d, 'rb') as compressed_input_file:
    evs_at_distance_in_polygons_per_size = pickle.load(compressed_input_file)
    print(f'evs_at_distance_in_polygons_per_size loaded from {od}')

FileNotFoundError: [Errno 2] No such file or directory: './resources/analysis/output/amp_apop2h01_more_regions_8h/evs_per_replicate_in_polygon.pickle.bz2'

In [26]:
# if the counts do not existe, compute them and save to file
evs_per_replicate_in_polygon, evs_in_polygon_per_size, evs_at_distance_in_polygons_per_size = als.identify_evs_per_polygon(
    analysis_setup['prep_polys'], analysis_setup['evs_d_useful'], targets['sizes'], distance_polygons=analysis_setup['opt_polys_selected'])

# export the identified evs per polygon for later reuse
with bz2.BZ2File('./resources/analysis/output/evs_per_replicate_in_polygon.pickle.bz2', 'wb') as compressed_output_file:
    pickle.dump(evs_per_replicate_in_polygon, compressed_output_file)
    print('evs_per_replicate_in_polygon saved to ./resources/analysis/output/evs_per_replicate_in_polygon.pickle.bz2')
with bz2.BZ2File('./resources/analysis/output/evs_in_polygon_per_size.pickle.bz2', 'wb') as compressed_output_file:
    pickle.dump(evs_in_polygon_per_size, compressed_output_file)
    print('evs_in_polygon_per_size saved to ./resources/analysis/output/evs_in_polygon_per_size.pickle.bz2')
with bz2.BZ2File('./resources/analysis/output/evs_at_distance_in_polygons_per_size.pickle.bz2', 'wb') as compressed_output_file:
    pickle.dump(evs_at_distance_in_polygons_per_size, compressed_output_file)
    print('evs_at_distance_in_polygons_per_size saved to ./resources/analysis/output/evs_at_distance_in_polygons_per_size.pickle.bz2')

received 13 distance polygons and 28 prepared polygons
There are 6 replicates, 28 polygons
Processing replicate 0 : poly 0 checking 220290 evs, poly 1 checking 220290 evs, poly 2 checking 220290 evs, poly 3 checking 220290 evs, poly 4 checking 220290 evs, poly 5 checking 220290 evs, poly 6 checking 220290 evs, poly 7 checking 220290 evs, poly 8 checking 220290 evs, poly 9 checking 220290 evs, poly 10 checking 220290 evs, poly 11 checking 220290 evs, poly 12 checking 220290 evs, poly 13 checking 220290 evs, poly 14 checking 220290 evs, poly 15 checking 220290 evs, poly 16 checking 220290 evs, poly 17 checking 220290 evs, poly 18 checking 220290 evs, poly 19 checking 220290 evs, poly 20 checking 220290 evs, poly 21 checking 220290 evs, poly 22 checking 220290 evs, poly 23 checking 220290 evs, poly 24 checking 220290 evs, poly 25 checking 220290 evs, poly 26 checking 220290 evs, poly 27 checking 220290 evs,
Processing replicate 1 : poly 0 checking 219263 evs, poly 1 checking 219263 evs, p

In [27]:
# compute relevants stats per polygon
counts_per_polygon, max_freq_per_polygon = als.compute_basic_stats_per_polygon(evs_in_polygon_per_size)
counts_per_distance_per_polygon, max_freq_per_distance_per_polygon = als.compute_stats_per_distance_per_polygon(evs_at_distance_in_polygons_per_size)

In [28]:
%output size=150
als.produce_size_distribution_histograms_per_polygon(counts_per_polygon, targets['sizes'], targets['edges'], max_freq_per_polygon, columns=4, section_name=section)

## EV size distribution within X distances in the Regions of Interest

In [29]:
%output size=150
als.produce_size_distribution_histograms_per_polygon_at_distance(counts_per_distance_per_polygon, 
                                                                 targets['sizes'], targets['edges'],
                                                                 max_freq_per_distance_per_polygon, 
                                                                 analysis_setup['from_to'], columns=4, 
                                                                 section_name=section)

# Deprecated code

# needs to be updated to show an analysis of the selected elements
def show_positions_and_sizes2(pts, exterior_path, prefix='', dshader=True, highlight_ilocs=None, roi=None):
    %opts Scatter [tools=['hover']]
    all_points = hv.Scatter(pts, kdims=[('x', prefix + 'x'), ('y', prefix + 'y')], vdims=[('radius_um', prefix + 'radius_um'), ('id', prefix + 'id')])

    # user specified elements selected
    if highlight_ilocs:
        highlight_points = hv.Scatter(all_points.iloc[highlight_ilocs])
    
    if dshader:
        if highlight_ilocs:
            %opts RGB [width=300 height=300 bgcolor="white"]
            ds_all_points = datashade(all_points, x_sampling=1, y_sampling=1)
            ds_highlight_points = datashade(highlight_points, x_sampling=1, y_sampling=1)
        else:
            %opts RGB [width=300 height=300 bgcolor="white"]
            ds_all_points = datashade(all_points, x_sampling=1, y_sampling=1)

    # Declare a Bounds stream and DynamicMap to get the box select geometry and draw it
    box = streams.BoundsXY(source=highlight_points if highlight_ilocs else all_points, bounds=(0,0,0,0))
    bounds = hv.DynamicMap(lambda bounds: hv.Bounds(bounds), streams=[box])

    # Declare a DynamicMap to apply bounds selection
    dmap = hv.DynamicMap(lambda bounds: (highlight_points if highlight_ilocs else all_points).select(
        x=(bounds[0], bounds[2]), y=(bounds[1], bounds[3])), streams=[box])
    # Compute histograms of selection
    radius_um_hist_sel = hv.operation.histogram(dmap, bin_range=(highlight_points if highlight_ilocs else all_points).range('radius_um'), 
                                                dimension='radius_um', dynamic=True,
                                                normed=False, num_bins=10,
                                                frequency_label= prefix + 'frequency'
                                               ).relabel('EV size distribution in selected region')
    if highlight_ilocs:
        return exterior_path * all_points * (ds_highlight_points if dshader else highlight_points) * bounds + radius_um_hist_sel
    else:
        return exterior_path * (ds_all_points if dshader else all_points) * bounds + radius_um_hist_sel