In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind, rankdata

df = pd.read_csv("colon_cancer_tumor_vs_normal_unpaired_FPKM.tsv", sep="\t", index_col=0)

cancer_columns = [col for col in df.columns if '01A' in col]
normal_columns = [col for col in df.columns if '11A' in col]

df["p-value"] = [ttest_ind(df[cancer_columns].loc[i], df[normal_columns].loc[i])[1] for i in df.index]
df["padj_BH"] = np.minimum(df["p-value"] * len(df) / rankdata(df["p-value"]), 1)

df2 = pd.read_csv("DESeq2_results_unpaired.tsv", sep="\t", index_col=0)

print(f'Top-10 (t-test): {list(df.sort_values(by="padj_BH")[:10].index)}')
print(f'Top-10 (DESeq2): {list(df2[:10].index)}')
print(f'Intersection: {set(df.sort_values(by="padj_BH")[:10].index) & set(df2[:10].index)}')
print(f'Difference between Top-10 (t-test) and Top-10 (DESeq2): '
      f'{set(df.sort_values(by="padj_BH")[:10].index).difference(set(df2[:10].index))}')
print(f'Difference between Top-10 (DESeq2) and Top-10 (t-test): '
      f'{set(df2[:10].index).difference(set(df.sort_values(by="padj_BH")[:10].index))}')


Top-10 (t-test): ['C17orf96', 'FUT1', 'IER5L', 'CDH3', 'FXYD5', 'ZNHIT2', 'GPR4', 'DKC1', 'ETV4', 'MT1H']
Top-10 (DESeq2): ['FABP6', 'ETV4', 'IER5L', 'KRT80', 'FUT1', 'C17orf96', 'CLDN1', 'ATG9B', 'KIAA1257', 'SLC51B']
Intersection: {'ETV4', 'C17orf96', 'FUT1', 'IER5L'}
Difference between Top-10 (t-test) and Top-10 (DESeq2): {'DKC1', 'ZNHIT2', 'CDH3', 'GPR4', 'FXYD5', 'MT1H'}
Difference between Top-10 (DESeq2) and Top-10 (t-test): {'FABP6', 'KRT80', 'CLDN1', 'SLC51B', 'ATG9B', 'KIAA1257'}
