# MicroArray Data Analysis

```python
# If you have any questions, please run the following command to send me a message :)
from teilab.question import ask
ask(text="I have a question about...")
```

In [1]:
import numpy as np
import pandas as pd
from teilab.datasets import TeiLabDataSets
from teilab.normalizations import quantile
from teilab.plot.plotly import densityplot
from teilab.utils import subplots_create

pd.set_option('display.max_columns', 200)

In [7]:
>>> import  plotly.graph_objects as go
>>> from plotly.subplots import make_subplots
>>> from teilab.plot.plotly import update_layout
>>> fig = make_subplots(rows=1, cols=2)
>>> for c in range(1,3): fig.add_trace(go.Scatter(x=[1,2,3],y=[4,5,6]),row=1,col=c)
>>> fig = update_layout(fig=fig, title="Sample", ylim=(4.5,5.5), col=2, height=400)
>>> fig.show()

In [3]:
import plotly
>>> plotly.io.orca.config.executable

'/usr/local/bin/orca'

In [None]:
 = '/path/to/orca'

After updating this executable property, try the export operation again.
If it is successful then you may want to save this configuration so that it
will be applied automatically in future sessions. You can do this as follows:

    >>> plotly.io.orca.config.save(

In [None]:
a

### Prepare Required Data

Some data are **"unpublished"**, so please treat it carefully.

In [None]:
datasets = TeiLabDataSets(verbose=False)

In [None]:
data_dir1 = datasets.get_data(password="microarray2020A")
data_dir2 = datasets.get_data(password="microarray2021S")

In [None]:
datasets.samples.show_groups()

### 2. Read Data & Merge

In [None]:
sample_numbers = datasets.samples.get_group_numbers(group_no=1)
print(sample_numbers)

In [None]:
df_anno = datasets.read_data(no=sample_numbers[0], usecols=datasets.ANNO_COLNAMES)
reliable_index = set(df_anno.index)
print(f"[Before] The number of data: {len(df_anno)}")

df_combined = df_anno.copy(deep=True)
for no in sample_numbers:
    df_data = datasets.read_data(no=no)
    reliable_index = reliable_index & set(datasets.reliable_filter(df=df_data))
    df_combined = pd.concat([
        df_combined, 
        df_data[[datasets.TARGET_COLNAME]].rename(columns={datasets.TARGET_COLNAME: datasets.samples.Condition[no]})
    ], axis=1)

df_combined = df_combined.loc[reliable_index, :].reset_index(drop=True)
print(f"[After] The number of data: {len(df_combined)}")

In [None]:
df_combined.head(3)

### 3. Normalization

In [None]:

n_samples = 8
cmap = plt.get_cmap(name=None)
colors = [cmap(((i+1)/(n_samples))) for i in range(n_samples)]
colors

In [None]:
print(get_colorList(n=data.shape[1], style="plotly"))

In [None]:
from matplotlib.colors import Colormap
Colormap("bwr")

In [None]:
get_colorList(n=3, cmap="bwr", style="plotly")

In [None]:
data = df_combined[expression_colnames].values

In [None]:
densityplot(np.log2(data.T), cmap="bwr", xlabel="$\\log_{2}(\\text{gProcessedSignal})$", colors=['rgba(64,64,255,255)', 'rgba(128,128,255,0.3)', 'rgba(192,192,255,0.2)', 'rgba(255,254,254,0.3)', 'rgba(255,190,190,0.3)', 'rgba(255,126,126,0.3)', 'rgba(255,62,62,0.2)', 'rgba(255,0,0,255)'])

In [None]:
fig

In [None]:
import plotly.graph_objects as go