# MicroArray Data Analysis

```python
# If you have any questions, please run the following command to send me a message :)
from teilab.question import ask
ask(text="I have a question about...")
```

In [1]:
import numpy as np
import pandas as pd
from teilab.datasets import TeiLabDataSets
from teilab.normalizations import quantile
from teilab.plot.plotly import densityplot
from teilab.utils import subplots_create

pd.set_option('display.max_columns', 200)

### Prepare Required Data

Some data are **"unpublished"**, so please treat it carefully.

In [14]:
datasets = TeiLabDataSets(verbose=False)

In [15]:
data_dir1 = datasets.get_data(password="microarray2020A")
data_dir2 = datasets.get_data(password="microarray2021S")

In [16]:
datasets.samples.show_groups()

  idx    gn  GroupName                              FileName
-----  ----  -------------------------------------  ---------------------------------------------------
    0     0  SG19378659_257236339458_S001_GE1_1200  SG19378659_257236339458_S001_GE1_1200_Jun14_1_1.txt
    1     0  SG19378659_257236339458_S001_GE1_1200  SG19378659_257236339458_S001_GE1_1200_Jun14_1_2.txt
    2     0  SG19378659_257236339458_S001_GE1_1200  SG19378659_257236339458_S001_GE1_1200_Jun14_1_3.txt
    3     0  SG19378659_257236339458_S001_GE1_1200  SG19378659_257236339458_S001_GE1_1200_Jun14_1_4.txt
    4     0  SG19378659_257236339458_S001_GE1_1200  SG19378659_257236339458_S001_GE1_1200_Jun14_2_1.txt
    5     1  US91503671_253949442637_S01_GE1_105    US91503671_253949442637_S01_GE1_105_Dec08_1_1.txt
    6     1  US91503671_253949442637_S01_GE1_105    US91503671_253949442637_S01_GE1_105_Dec08_2_3.txt
    7     1  US91503671_253949442637_S01_GE1_105    US91503671_253949442637_S01_GE1_105_Dec08_2_4.txt
    8    

### 2. Read Data & Merge

In [37]:
from teilab.utils import subplots_create
fig = subplots_create(style="plotly")

In [85]:
from teilab.plot.plotly import MAplot

In [39]:
>>> fig1 = XYplot(df=df_combined, x=datasets.samples.Condition[0], y=datasets.samples.Condition[1], hover_name="SystematicName", height=600, width=600)

In [17]:
sample_numbers = datasets.samples.get_group_numbers(group_no=1)
print(sample_numbers)

[5, 6, 7, 8, 9, 10, 11, 12]


In [18]:
df_anno = datasets.read_data(no=sample_numbers[0], usecols=datasets.ANNO_COLNAMES)
reliable_index = set(df_anno.index)
print(f"[Before] The number of data: {len(df_anno)}")

df_combined = df_anno.copy(deep=True)
for no in sample_numbers:
    df_data = datasets.read_data(no=no)
    reliable_index = reliable_index & set(datasets.reliable_filter(df=df_data))
    df_combined = pd.concat([
        df_combined, 
        df_data[[datasets.TARGET_COLNAME]].rename(columns={datasets.TARGET_COLNAME: datasets.samples.Condition[no]})
    ], axis=1)

df_combined = df_combined.loc[reliable_index, :].reset_index(drop=True)
print(f"[After] The number of data: {len(df_combined)}")

[Before] The number of data: 62976
[After] The number of data: 20947


In [23]:
df_combined["mock(1)"]

0         12566.8700
1        122723.7000
2          1234.1460
3           994.2160
4          4842.2340
            ...     
20942       673.4793
20943      3253.3950
20944       262.7825
20945     75533.9700
20946       619.1220
Name: mock(1), Length: 20947, dtype: float64

$\log_2{(da)}$

### 3. Normalization

In [None]:

n_samples = 8
cmap = plt.get_cmap(name=None)
colors = [cmap(((i+1)/(n_samples))) for i in range(n_samples)]
colors

In [None]:
print(get_colorList(n=data.shape[1], style="plotly"))

In [None]:
from matplotlib.colors import Colormap
Colormap("bwr")

In [None]:
get_colorList(n=3, cmap="bwr", style="plotly")

In [None]:
data = df_combined[expression_colnames].values

In [None]:
densityplot(np.log2(data.T), cmap="bwr", xlabel="$\\log_{2}(\\text{gProcessedSignal})$", colors=['rgba(64,64,255,255)', 'rgba(128,128,255,0.3)', 'rgba(192,192,255,0.2)', 'rgba(255,254,254,0.3)', 'rgba(255,190,190,0.3)', 'rgba(255,126,126,0.3)', 'rgba(255,62,62,0.2)', 'rgba(255,0,0,255)'])

In [None]:
fig

In [None]:
import plotly.graph_objects as go