In [12]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from tqdm import tqdm
import gc

In [154]:
def _clean(x, default='float'):   
    non_default = 'int' if default=='float' else 'float'
    try:
        x.replace([np.inf, -np.inf], np.nan, inplace=True)
        x.dropna(how='all', axis=1, inplace=True)
        if default=='float':
            x = x * 1.0
        else:
            x = x * 1
    except Exception as e:
        print(e)
        for col in tqdm.tqdm(x.columns):
            if 'object' in str(x[col].dtypes):
                try:
                    x[col] = x[col].astype(default)
                except:
                    try:
                        x[col] = x[col].astype(non_default)
                    except:
                        print(col)
                        x[col] = x[col].astype('category')
    return x

def get_transposed(df, NameRow='GenX', prefix='GenX'):
    transposed  = df.T
    new_index = transposed.loc[[NameRow]].values.tolist()[0]
    transposed.columns = new_index
    if prefix is not None:
        transposed.columns = [prefix+'_'+_col for _col in transposed.columns.values.tolist()]
    return transposed.drop(NameRow, axis=0, inplace=False)

# def _impute(df, type='FM):
#   # use similarity to replace missing values with values for most similar patients.

In [14]:
# loading data

In [15]:
pheno_small = pd.read_csv("../_docs/Lung_Phenotype_Metadata.txt", sep="\t")
pheno_large = pd.read_csv("../_docs/Lung_Table_Phenotypes.txt", sep="\t")
pheno_large.set_index('submitter_id.samples', inplace=True)

In [16]:
sourceDir =  "/media/bramvanes/Extra/DATA/RexR/2018" #"/media/koekiemonster/DATA-FAST/genetic_expression/hackathon_2/"
gene_expression = pd.read_table(sourceDir+"/Lung/Lung_GeneExpression.txt", sep="\t")

affx_labels = list(gene_expression.set_index('Gene').filter(axis=0, regex=r"^(AFFX.*)").index) # AFFX 
gene_expression = gene_expression.drop(affx_labels, axis=0)

gene_expression = gene_expression[np.isfinite(gene_expression.Start)]
gene_expression = gene_expression[np.isfinite(gene_expression.Stop)]

gene_expression.Start = gene_expression.Start.astype(int).astype(str)
gene_expression.Stop = gene_expression.Stop.astype(int).astype(str)
gene_expression.Chr = gene_expression.Chr.astype(str)
gene_expression.Gene = gene_expression.Gene.astype(str)

gene_expression['GenX'] = gene_expression[['Gene', 'Chr', 'Start', 'Stop']].apply(lambda x: '.'.join(x), axis=1)
_map_RNA = gene_expression[['Gene', 'GenX']]
gene_expression = gene_expression.drop(['Gene', 'Chr', 'Start', 'Stop'], axis=1)

dict_RNA={'RNA_StrandPlus': _clean(get_transposed(gene_expression\
                                                  .loc[gene_expression.Strand=='+'].drop(['Strand'], axis=1))),
          'RNA_StrandMin':  _clean(get_transposed(gene_expression\
                                                  .loc[gene_expression.Strand=='-'].drop(['Strand'], axis=1)))                                
          }


In [17]:
# CLEAN MEMORY
del gene_expression
gc.collect()

7

In [18]:
# merge with phenotypes
pheno_features = ['batch_number', 'code.tissue_source_site', 'sample_type.samples', 'vial_number', 'ethnicity.demographic', 'gender.demographic',
                   'race.demographic', 'year_of_birth.demographic', 'diagnosis']

In [77]:
merged_RNA_min = dict_RNA['RNA_StrandMin'].merge(pheno_large[pheno_features], how='inner', left_index=True, right_index=True)
merged_RNA_plus = dict_RNA['RNA_StrandPlus'].merge(pheno_large[pheno_features], how='inner', left_index=True, right_index=True)


# Bias correction functions
## L/S, cohort-based normalisation

Mean-based $$\mathbf{x}^*=\frac{\mathbf{x}-\overline{\mathbf{x}}}{\sigma}$$


Median-based $$\mathbf{x}^*=\frac{\mathbf{x}-median(\mathbf{x})}{IQR}$$






In [163]:
# L/S
def _preprocess(df, cohorts = [], scaler = "standard", bias_removal = False, col_range = (0,30000), debug=False):
        cr = range(col_range[0], col_range[1])
        gene_columns = df.columns[cr]
        
        if len(cohorts)==0:
            cohorts = df.batch_number.unique().tolist()
        
        # MinMaxScaler(), MaxAbsScaler(), RobustScaler(), QuantileTransformer(), Normalizer()
        if scaler == "standard":
            scaler = preprocessing.StandardScaler(with_mean=True, with_std=True)
        elif scaler == "minmax":
            scaler = preprocessing.MinMaxScaler()
        elif scaler == "maxabs":
            scaler = preprocessing.MaxAbsScaler()
        elif scaler == "robust":
            scaler = preprocessing.RobustScaler(quantile_range=(25.0, 75.0), 
                                                    with_scaling=True, with_centering=True)
        elif scaler in ["normalizer", "normaliser"]:
            scaler = preprocessing.Normalizer()

        if bias_removal == True:
            print("- "*30, 'Removing cohort biases')
            for cohort in tqdm(cohorts): # easy to parallelise...
                ch = df['batch_number']==cohort
                # app 1 sklearn
                if debug==False:
                    try:
                        # remove nans
                        #df.loc[ch,gene_columns] = scaler.fit_transform(df.loc[ch,gene_columns])
                        df_temp = df.loc[ch,gene_columns].copy()
                        res = (df_temp-df_temp.mean(skipna=True, axis=0))/\
                                                        df_temp.std(skipna=True, axis=0)
                        df.loc[ch,gene_columns] = res
                    except Exception as e:
                        print("ERROR", e, "cohort:"+cohort)
                else:
                    for _col in gene_columns: # for debugging
                        df_temp = df.loc[ch, _col].copy() 
                        try:
                            df.loc[ch, _col] = (df_temp-df_temp.mean())/df_temp.std()
                        except Exception as e:
                            print("ERROR", e, "gene:"+_col, "cohort:"+cohort)
        else:
            ch = df["array-batch"].isin(cohorts)
            df.loc[ch,gene_columns] = scaler.fit_transform(df.loc[ch,gene_columns])
            

        df = df[df["batch_number"].isin(cohorts)]
        return df       

In [164]:
_preprocess(merged_RNA_min, bias_removal=True, col_range=(0, 29905), debug=False) # 29905

  0%|          | 0/42 [00:00<?, ?it/s]

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  Removing cohort biases


  2%|▏         | 1/42 [00:23<15:43, 23.00s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (18, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

  5%|▍         | 2/42 [00:45<15:16, 22.92s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (39, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

  7%|▋         | 3/42 [01:08<14:51, 22.87s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (56, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 10%|▉         | 4/42 [01:30<14:17, 22.55s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (53, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 12%|█▏        | 5/42 [01:51<13:46, 22.33s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (32, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 14%|█▍        | 6/42 [02:13<13:19, 22.20s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (36, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 17%|█▋        | 7/42 [02:35<12:58, 22.25s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (26, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 19%|█▉        | 8/42 [02:57<12:35, 22.22s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (13, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 21%|██▏       | 9/42 [03:19<12:10, 22.13s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (29, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 24%|██▍       | 10/42 [03:41<11:47, 22.12s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (22, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 26%|██▌       | 11/42 [04:03<11:24, 22.09s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (9, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Gen

 29%|██▊       | 12/42 [04:25<11:03, 22.13s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (48, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

 31%|███       | 13/42 [04:46<10:40, 22.07s/it]

Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'GenX_DPM1.chr20.50934867.50958555',
       'GenX_SCYL3.chr1.169849631.169894267',
       'GenX_FGR.chr1.27612064.27635277',
       'GenX_FUCA2.chr6.143494811.143511690',
       'GenX_GCLC.chr6.53497341.53616970', 'GenX_STPG1.chr1.24356999.24416934',
       'GenX_LAS1L.chrX.65512582.65534775',
       'GenX_CYP51A1.chr7.92112151.92142952',
       'GenX_KRIT1.chr7.92198969.92246166',
       ...
       'GenX_TRPC6P.chrY.57171890.57172769',
       'GenX_DHRSX-IT1.chrY.2334295.2336410',
       'GenX_RPL14P5.chrY.1008503.1010101',
       'GenX_DDX11L16.chrY.57212184.57214397',
       'GenX_TCEB1P24.chrY.57165512.57165845',
       'GenX_KRT18P53.chrY.545236.545352',
       'GenX_LINC00102.chrY.2612988.2615347',
       'GenX_FABP5P13.chrY.523775.524102', 'GenX_AMDP1.chrY.57015105.57016096',
       'GenX_Metazoa_SRP.chrY.388100.388389'],
      dtype='object', length=29911) (18, 29911) Index(['GenX_TSPAN6.chrX.100627109.100639991',
       'Ge

KeyboardInterrupt: 

In [None]:
## PCA-shift


## ANOVA 2-way


## Combat
# use R-script, call from PYthon