### Semana 06 - lec02 - flow cytomery

  - FlowRepository
    - https://flowrepository.org/
  - Reading Flow cytometry:
    - https://taborlab.github.io/FlowCal/python_tutorial/read.html


### Distribuição normal centrada em 2.5 com SSD = .5

In [None]:
import os, sys, math
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats

import matplotlib.pyplot as plt # matplotlib e seu alias plt
%matplotlib inline

In [None]:
# os.system("pip install FlowCal")
import FlowCal

In [None]:
data = FlowCal.io.FCSData('../data/PBMC_NTC Stim 5hr no Blocker_004.fcs')
data.shape

In [None]:
data.channels, len(data.channels)

In [None]:
data[0,:]

In [None]:
fcs = data[:, 1]
fcs[:20]

In [None]:
ssc = data[:, 4]
ssc[:20]

In [None]:
import plotly
import plotly.graph_objects as go
import plotly.express as px
template='plotly_white'

import matplotlib as mpl
import pylab as plt
%matplotlib inline

In [None]:
# ret = plt.hist(fcs, bins=100)
fig = plt.figure(figsize=(10,6))
ax = sns.distplot(fcs, hist=True, kde=True, color='blue', hist_kws=dict(alpha=.4))

### Normality Hypothesis Test

https://machinelearningmastery.com/statistical-hypothesis-tests-in-python-cheat-sheet/

Jason Brownlee on August 15, 2018 in Statistics

In [None]:
# Example of the Shapiro-Wilk Normality Test
from scipy.stats import shapiro

xrandom = stats.norm.rvs(loc=1, scale=2, size=1200)
xrandom = np.random.normal(1, 2, 1200)

stat, p = shapiro(xrandom)

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably Gaussian')
else:
    print('Probably not Gaussian')
    
mu  = np.mean(xrandom)
med = np.median(xrandom)
ssd = np.std(xrandom)
vc  = ssd/mu

'Mean = %.2f median  = %.2f SSD = %.2f, n = %d, and VC = %.2f'%(mu, med, ssd, len(xrandom), vc)

### FCS statistics

In [None]:
stat, p = shapiro(fcs)

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably Gaussian')
else:
    print('Probably not Gaussian')
    
mu  = np.mean(fcs)
med = np.median(fcs)
ssd = np.std(fcs)
vc  = ssd/mu

'Mean = %.2f median  = %.2f SSD = %.2f, n = %d, and VC = %.2f'%(mu, med, ssd, len(fcs), vc)

### Matplotlib

In [None]:
fig = plt.figure(figsize=(10,6))
ret = plt.hist(ssc, bins=100)

### Seaborn

In [None]:
fig = plt.figure(figsize=(10,6))
sns.distplot(fcs, hist=True, kde=True, color='blue', hist_kws=dict(alpha=.4));

### Plotly

In [None]:
stat, p = shapiro(fcs)

title = 'FCS >>type plot<br>stat=%.3f, p=%.3f'%(stat, p)
if p > 0.05:
    title += ' - Probably Gaussian'
else:
    title += ' - Probably not Gaussian'
    
mu  = np.mean(fcs)
med = np.median(fcs)
ssd = np.std(fcs)
vc  = ssd/mu

title += '<br>Mean = %.2f median  = %.2f SSD = %.2f, n = %d, and VC = %.2f'%(mu, med, ssd, len(fcs), vc)
in_percent = False

fig = go.Figure()

if in_percent:
    title = title.replace(">>type", "distribution")
    fig.add_trace(go.Histogram(x=fcs, histnorm='probability', marker_color='blue', opacity=0.4))
    yaxis_title="probaility (%)"
else:
    title = title.replace(">>type", "fequency")
    fig.add_trace(go.Histogram(x=fcs, marker_color='blue', opacity=0.4))
    yaxis_title="frequency"
        # text=textList_one,
        # hovertemplate = "pos: %{x}<br>count: %{y}</br>%{text}",
        # name=name, marker_line_width=1,

fig.update_layout(
    # barmode = 'group',
    autosize=True,
    title=title,
    # width=width,
    # height=height,
    # template=template,
    margin=dict( l=40, r=40, b=40, t=100, pad=4),
    font=dict(
        family="Arial, bold, monospace",
        size=14,
        color='black'
    ),
    xaxis_title="FCS",
    yaxis_title=yaxis_title,
    paper_bgcolor="whitesmoke",
    plot_bgcolor= "whitesmoke", # lightgrey ivory gainsboro whitesmoke lightsteelblue 'lightcyan' 'azure', white, lightgrey, snow ivory beige powderblue
    showlegend=False
)
fig.show()

### Lognormal

In [None]:
fcsLog = [np.log2(x) for x in fcs]
sscLog = [np.log2(x) for x in ssc]

In [None]:
stat, p = shapiro(fcsLog)

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably Gaussian')
else:
    print('Probably not Gaussian')
    
mu  = np.mean(fcsLog)
med = np.median(fcsLog)
ssd = np.std(fcsLog)
vc  = ssd/mu

'Mean = %.2f median  = %.2f SSD = %.2f, n = %d, and VC = %.2f'%(mu, med, ssd, len(fcsLog), vc)

In [None]:
stat, p = shapiro(sscLog)

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably Gaussian')
else:
    print('Probably not Gaussian')
    
mu  = np.mean(sscLog)
med = np.median(sscLog)
ssd = np.std(sscLog)
vc  = ssd/mu

'Mean = %.2f median  = %.2f SSD = %.2f, n = %d, and VC = %.2f'%(mu, med, ssd, len(sscLog), vc)

### Seaborn

In [None]:
fig = plt.figure(figsize=(12,6))
ax = sns.distplot(fcsLog, hist=True, kde=True, color='blue', hist_kws=dict(alpha=.2), label='fcs')
ax = sns.distplot(sscLog, hist=True, kde=True, color='red',  hist_kws=dict(alpha=.2), label='ssc')

plt.title("FCS and SSC distributions")
plt.legend();

### Plotly

In [None]:
stat, p = shapiro(fcsLog)

title = 'FCS >>type plot<br>stat=%.3f, p=%.3f'%(stat, p)
if p > 0.05:
    title += ' - Probably Gaussian'
else:
    title += ' - Probably not Gaussian'
    
mu  = np.mean(fcsLog)
med = np.median(fcsLog)
ssd = np.std(fcsLog)
vc  = ssd/mu

title += '<br>Mean = %.2f median  = %.2f SSD = %.2f, n = %d, and VC = %.2f'%(mu, med, ssd, len(fcsLog), vc)
in_percent = True

fig = go.Figure()

if in_percent:
    title = title.replace(">>type", "lognormal distribution")
    fig.add_trace(go.Histogram(x=fcs, histnorm='probability', marker_color='blue', opacity=0.4))
    yaxis_title="percent (%)"
else:
    title = title.replace(">>type", "lognormal fequency")
    fig.add_trace(go.Histogram(x=fcs, histnorm='probability', marker_color='blue', opacity=0.4))
    yaxis_title="frequency"
        # text=textList_one,
        # hovertemplate = "pos: %{x}<br>count: %{y}</br>%{text}",
        # name=name, marker_line_width=1,

fig.update_layout(
    # barmode = 'group',
    autosize=True,
    title=title,
    # width=width,
    # height=height,
    # template=template,
    margin=dict( l=40, r=40, b=40, t=100, pad=4),
    font=dict(
        family="Arial, bold, monospace",
        size=14,
        color='black'
    ),
    xaxis_title="FCS",
    yaxis_title=yaxis_title,
    paper_bgcolor="whitesmoke",
    plot_bgcolor= "whitesmoke", # lightgrey ivory gainsboro whitesmoke lightsteelblue 'lightcyan' 'azure', white, lightgrey, snow ivory beige powderblue
    showlegend=False
)
fig.show()

In [None]:
title = 'cytometry example'
fontsize=14; fontcolor='black';
width=2000; height=600;

fig = go.Figure()

fig.add_trace(go.Scatter(x=fcsLog, y=sscLog, 
                 # text=textList_one,
                 # hovertemplate = "pos: %{x}<br>count: %{y}</br>%{text}",
                 mode='markers', marker_color='blue') ) # name=name, marker_line_width=1,


fig.update_layout(
    # barmode = 'group',
    autosize=True,
    title=title,
    # width=width,
    # height=height,
    # template=template,
    margin=dict( l=40, r=40, b=40, t=100, pad=4),
    font=dict(
        family="Arial, bold, monospace",
        size=fontsize,
        color=fontcolor
    ),
    xaxis_title="FSC - forward scatter",
    yaxis_title="SSC = side scatter",
    paper_bgcolor="whitesmoke",
    plot_bgcolor= "whitesmoke", # lightgrey ivory gainsboro whitesmoke lightsteelblue 'lightcyan' 'azure', white, lightgrey, snow ivory beige powderblue
    showlegend=False
)
fig.show()

In [None]:
df = pd.DataFrame([fcsLog, sscLog]).T
df.columns = ['fscLog', 'sscLog']
df

### Seaborn

see: http://seaborn.pydata.org/generated/seaborn.jointplot.html

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="white", color_codes=True)

ax = sns.jointplot(x="fscLog", y="sscLog", data=df);

#-- draw a circle
ax.ax_joint.plot([16.5],[17],'o',ms=100,mec='red',mfc='none')