In [3]:
import bokeh
from bokeh.plotting import show
import matplotlib.pyplot as plt
import numpy as np

import flowkit as fk
from pathlib import Path

bokeh.io.output_notebook()
%matplotlib inline

_ = plt.ioff()

In [2]:
fk.__version__

'0.9.3'

In [4]:
fcs_path = Path('../example_data/101_DEN084Y5_15_E01_008_clean.fcs')
sample = fk.Sample(fcs_path_or_data=fcs_path)
sample.channels

Unnamed: 0,channel_number,pnn,pns,png,pne,pnr
0,1,FSC-A,,1.0,"(0.0, 0.0)",262144.0
1,2,FSC-H,,1.0,"(0.0, 0.0)",262144.0
2,3,FSC-W,,1.0,"(0.0, 0.0)",262144.0
3,4,SSC-A,,1.0,"(0.0, 0.0)",262144.0
4,5,SSC-H,,1.0,"(0.0, 0.0)",262144.0
5,6,SSC-W,,1.0,"(0.0, 0.0)",262144.0
6,7,TNFa FITC FLR-A,,1.0,"(0.0, 0.0)",262144.0
7,8,CD8 PerCP-Cy55 FLR-A,,1.0,"(0.0, 0.0)",262144.0
8,9,IL2 BV421 FLR-A,,1.0,"(0.0, 0.0)",262144.0
9,10,Aqua Amine FLR-A,,1.0,"(0.0, 0.0)",262144.0


In [13]:
sample.original_filename = 'Some name'  # the file didnt have this field and it is needed, so we just define it here

In [14]:
p = sample.plot_scatter(x_label_or_number=12, y_label_or_number=10, source='raw', subsample=True)  # scatter plot of the raw data
show(p)

In [15]:
logicle_xform = fk.transforms.LogicleTransform(transform_id='logicle', param_t=262144, param_m=4.5, param_w=0.5, param_a=0) 
sample.apply_transform(transform=logicle_xform)  # transform data with Logicle

In [17]:
p = sample.plot_scatter(x_label_or_number=12, y_label_or_number=10, source='xform', subsample=True) 
show(p)  # plot Logicle transformed data

In [18]:
asinh_xform = fk.transforms.AsinhTransform(transform_id='asinh', param_t=262144, param_m=4., param_a=0) 
sample.apply_transform(transform=asinh_xform)  # transform data with Asinh

In [19]:
p = sample.plot_scatter(x_label_or_number=12, y_label_or_number=10, source='xform', subsample=True) 
show(p)  # plot asinh transformed data

In [20]:
biex_xform = fk.transforms.WSPBiexTransform(transform_id='biex', max_value=262144, positive=4.418540, width=-10, negative=0)  # these are the default values in FlowKit and FlowJo
sample.apply_transform(biex_xform)  # apply FlowJo's WSPBiexponential transform

In [21]:
p = sample.plot_scatter(12, 10, source='xform', subsample=True)
show(p)  # plot WSPBiexponential transformed data

In [26]:
diamond_fcs_path = Path("../example_data/test_data_diamond_01.fcs")
diamond_sample = fk.Sample(fcs_path_or_data=diamond_fcs_path)
diamond_sample.original_filename = 'diamond'

In [27]:
diamond_sample.channels

Unnamed: 0,channel_number,pnn,pns,png,pne,pnr
0,1,channel_A,,1.0,"(0.0, 0.0)",262144.0
1,2,channel_B,,1.0,"(0.0, 0.0)",262144.0


In [28]:
f = diamond_sample.plot_scatter(x_label_or_number=1, y_label_or_number=2, source='raw')
show(f)

In [29]:
diamond_sample.apply_transform(logicle_xform)
f = diamond_sample.plot_scatter(x_label_or_number=1, y_label_or_number=2, source='xform')
show(f)

In [30]:
data = np.arange(0, 100000, 100000/20).reshape(2, 10).T  # example data not in a Sample object
data

array([[    0., 50000.],
       [ 5000., 55000.],
       [10000., 60000.],
       [15000., 65000.],
       [20000., 70000.],
       [25000., 75000.],
       [30000., 80000.],
       [35000., 85000.],
       [40000., 90000.],
       [45000., 95000.]])

In [31]:
xform = fk.transforms.LogicleTransform(transform_id='logicle2', param_t=95000., param_w=0.5, param_m=4.5, param_a=0)  # define a new logicle transform
xform_data = xform.apply(data)  # Apply the data (as an array) to the transform (not the other way around because it is not a Sample)


In [32]:
xform_data

array([[0.11111111, 0.93801936],
       [0.71515638, 0.9472245 ],
       [0.78240245, 0.95562761],
       [0.82165336, 0.9633573 ],
       [0.84947795, 0.97051356],
       [0.87105003, 0.97717562],
       [0.88867034, 0.98340735],
       [0.90356495, 0.98926098],
       [0.91646525, 0.99477978],
       [0.92784278, 1.        ]])

In [33]:
inv_xform_data = xform.inverse(xform_data)  # apply the inverse transform to the array data

In [34]:
inv_xform_data

array([[    0., 50000.],
       [ 5000., 55000.],
       [10000., 60000.],
       [15000., 65000.],
       [20000., 70000.],
       [25000., 75000.],
       [30000., 80000.],
       [35000., 85000.],
       [40000., 90000.],
       [45000., 95000.]])

In [35]:
detectors = [sample.pnn_labels[i] for i in sample.fluoro_indices]  # get the fluorescence channels' name

In [36]:
detectors

['TNFa FITC FLR-A',
 'CD8 PerCP-Cy55 FLR-A',
 'IL2 BV421 FLR-A',
 'Aqua Amine FLR-A',
 'IFNg APC FLR-A',
 'CD3 APC-H7 FLR-A',
 'CD107a PE FLR-A',
 'CD4 PE-Cy7 FLR-A']

In [37]:
comp_file_path = Path('../example_data/den_comp.csv')
comp_mat = fk.Matrix(matrix_id='my_spill', spill_data_or_file=comp_file_path, detectors=detectors)  # create a compensation matrix from the CSV file


In [39]:
comp_mat.as_dataframe()

Unnamed: 0,TNFa FITC FLR-A,CD8 PerCP-Cy55 FLR-A,IL2 BV421 FLR-A,Aqua Amine FLR-A,IFNg APC FLR-A,CD3 APC-H7 FLR-A,CD107a PE FLR-A,CD4 PE-Cy7 FLR-A
TNFa FITC FLR-A,1.0,0.014139,0.0,0.0,0.000458,0.0,0.015546,0.0
CD8 PerCP-Cy55 FLR-A,0.0,1.0,0.0,0.0,0.020596,0.087982,0.0,0.127012
IL2 BV421 FLR-A,0.004253,0.00014,1.0,0.0,6.8e-05,0.0,0.0,0.0
Aqua Amine FLR-A,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
IFNg APC FLR-A,0.0,0.007508,0.0,0.0,1.0,0.178821,0.0,0.020226
CD3 APC-H7 FLR-A,0.000745,0.0,0.0,0.0,0.010806,1.0,0.0,0.12764
CD107a PE FLR-A,0.000342,0.034257,0.0,0.0,0.000455,0.000174,1.0,0.006291
CD4 PE-Cy7 FLR-A,0.0,0.017456,0.0,0.0,0.000134,0.060009,0.0368,1.0


In [40]:
sample.apply_compensation(comp_mat)  # compensate the sample data

In [41]:
p = sample.plot_scatter(x_label_or_number=12, y_label_or_number=10, source='xform', subsample=True)
show(p)

# Modify a matrix

In [42]:
fcs_path = Path('../example_data/100715.fcs')
sample = fk.Sample(fcs_path_or_data=fcs_path, subsample=20000)  

In [43]:
sample.channels

Unnamed: 0,channel_number,pnn,pns,png,pne,pnr
0,1,FSC-A,,1.0,"(0.0, 0.0)",262207.0
1,2,FSC-H,,1.0,"(0.0, 0.0)",262207.0
2,3,SSC-A,,1.0,"(0.0, 0.0)",261588.0
3,4,B515-A,KI67,1.0,"(0.0, 0.0)",261588.0
4,5,R780-A,CD3,1.0,"(0.0, 0.0)",261588.0
5,6,R710-A,CD28,1.0,"(0.0, 0.0)",261588.0
6,7,R660-A,CD45RO,1.0,"(0.0, 0.0)",261588.0
7,8,V800-A,CD8,1.0,"(0.0, 0.0)",261588.0
8,9,V655-A,CD4,1.0,"(0.0, 0.0)",261588.0
9,10,V585-A,CD57,1.0,"(0.0, 0.0)",261588.0


In [44]:
xform = fk.transforms.LogicleTransform(transform_id='logicle3', param_t=262144, param_m=4.5, param_w=0.5, param_a=0)
sample.apply_transform(xform)  # apply a logicle transform to the newly loaded data

In [45]:
p = sample.plot_scatter(x_label_or_number=5, y_label_or_number=8, source='xform', subsample=True)
show(p)  # Plot CD3 vs CD8

In [46]:
sample.apply_compensation(compensation=sample.metadata['spill'])  # apply compensation from the spillover matrix stored in the sample

In [48]:
p = sample.plot_scatter(x_label_or_number=5, y_label_or_number=8, source='xform', subsample=True)
show(p)

In [49]:
comp_mat = sample.compensation  # extract the compensation matrix original from the sample

In [50]:
comp_mat.as_dataframe()

Unnamed: 0,B515-A,R780-A,R710-A,R660-A,V800-A,V655-A,V585-A,V450-A,G780-A,G710-A,G660-A,G610-A,G560-A
B515-A,1.0,0.0,0.0,8.8e-05,0.000249,0.000645,0.007198,0.0,0.0,0.000131,6.7e-05,0.000582,0.00252
R780-A,0.0,1.0,0.071188,0.148448,0.338903,0.009717,0.0,0.0,0.30138,0.007478,0.012354,0.0,0.0
R710-A,0.0,0.331405,1.0,0.061965,0.120979,0.004053,0.0,0.0,0.109117,0.100314,0.005832,0.0,0.0
R660-A,0.0,0.088621,0.389424,1.0,0.029759,0.065553,0.0,0.0,0.031294,0.039306,0.091375,0.000396,5.7e-05
V800-A,0.0,0.136618,0.010757,0.0,1.0,0.000156,0.0,0.0,0.483235,0.014858,0.0,0.0,0.0
V655-A,0.0,0.000124,0.019463,0.218206,0.004953,1.0,0.003583,0.0,0.001311,0.029646,0.408902,0.006506,0.000119
V585-A,0.0,0.0,0.0,0.0,0.001056,0.002287,1.0,0.0,0.000389,0.000194,0.0,0.062551,0.132484
V450-A,0.0,0.0,0.0,0.0,0.0,0.008118,0.170066,1.0,0.0,0.0,0.0,0.0,0.0
G780-A,0.003122,0.008526,0.001024,0.001163,0.125401,0.018142,0.193646,0.0,1.0,0.066898,0.161456,0.286823,1.238037
G710-A,0.002015,0.069645,0.194715,0.001008,0.151611,0.00127,0.007133,0.0,1.150032,1.0,0.016077,0.014674,0.055352


In [51]:
comp_df = comp_mat.as_dataframe(fluoro_labels=True)  # save the Sample's compensation matrix using the fluors' name as columns, instead of the detector name

In [52]:
comp_df

Unnamed: 0,KI67,CD3,CD28,CD45RO,CD8,CD4,CD57,VIVID / CD14,CCR5,CD19,CD27,CCR7,CD127
KI67,1.0,0.0,0.0,8.8e-05,0.000249,0.000645,0.007198,0.0,0.0,0.000131,6.7e-05,0.000582,0.00252
CD3,0.0,1.0,0.071188,0.148448,0.338903,0.009717,0.0,0.0,0.30138,0.007478,0.012354,0.0,0.0
CD28,0.0,0.331405,1.0,0.061965,0.120979,0.004053,0.0,0.0,0.109117,0.100314,0.005832,0.0,0.0
CD45RO,0.0,0.088621,0.389424,1.0,0.029759,0.065553,0.0,0.0,0.031294,0.039306,0.091375,0.000396,5.7e-05
CD8,0.0,0.136618,0.010757,0.0,1.0,0.000156,0.0,0.0,0.483235,0.014858,0.0,0.0,0.0
CD4,0.0,0.000124,0.019463,0.218206,0.004953,1.0,0.003583,0.0,0.001311,0.029646,0.408902,0.006506,0.000119
CD57,0.0,0.0,0.0,0.0,0.001056,0.002287,1.0,0.0,0.000389,0.000194,0.0,0.062551,0.132484
VIVID / CD14,0.0,0.0,0.0,0.0,0.0,0.008118,0.170066,1.0,0.0,0.0,0.0,0.0,0.0
CCR5,0.003122,0.008526,0.001024,0.001163,0.125401,0.018142,0.193646,0.0,1.0,0.066898,0.161456,0.286823,1.238037
CD19,0.002015,0.069645,0.194715,0.001008,0.151611,0.00127,0.007133,0.0,1.150032,1.0,0.016077,0.014674,0.055352


In [53]:
# show compensation values for CD3 and CD8
print(comp_df.loc['CD3']['CD8'], comp_df.loc['CD8']['CD3'])

0.3389031912802132 0.13661791418865094


In [54]:
# edit the values
comp_df.loc['CD3']['CD8'] = 0.3
comp_df.loc['CD8']['CD3'] = 0.1

In [56]:
# define a modified compensation matrix
comp_mat_modified = fk.Matrix(
    matrix_id="custom_spill",
    spill_data_or_file=comp_df.values,
    detectors=comp_mat.detectors,
    fluorochromes=comp_mat.fluorochomes,
)
sample.apply_compensation(compensation=comp_mat_modified)  # apply the new compensation matrix

In [57]:
p = sample.plot_scatter(x_label_or_number=5, y_label_or_number=8, source='xform', subsample=True)
show(p)  # plot the compensated data