# Accuracy Summaries for DeepSEA

This notebook contains DeepSEA accuracies for each prediction type (DNase, TFs, and histones) and cell-type specific accuracies for TF predictions.

In [101]:
import pandas as pd

In [102]:
# NOTE: this line will fail if `xlrd` is not installed
results = pd.read_excel('deepsea_accuracies.xlsx', skiprows=1)

# convert string in AUC column to NaN value
results['AUC'] = results.AUC.apply(pd.to_numeric, errors='coerce')
results.head()

Unnamed: 0,Cell Type,TF/DNase/HistoneMark,Treatment,Original File Name,AUC,AUPRC,AUPRC Baseline
0,8988T,DNase,,wgEncodeAwgDnaseDuke8988tUniPk.narrowPeak.gz,0.910351,0.401035,0.039589
1,AoSMC,DNase,,wgEncodeAwgDnaseDukeAosmcUniPk.narrowPeak.gz,0.924486,0.439494,0.041031
2,Chorion,DNase,,wgEncodeAwgDnaseDukeChorionUniPk.narrowPeak.gz,0.896293,0.320603,0.031379
3,CLL,DNase,,wgEncodeAwgDnaseDukeCllUniPk.narrowPeak.gz,0.928262,0.35898,0.02112
4,Fibrobl,DNase,,wgEncodeAwgDnaseDukeFibroblUniPk.narrowPeak.gz,0.83792,0.340633,0.058138


In [103]:
# 125 dnase, 690 tfs, 104 histones
dnase = results[:125]
tfs = results[125:815]
histones = results[-104:]

# sanity check
assert(len(dnase) == 125)
assert(len(tfs) == 690)
assert(len(histones) == 104)

# Accuracies for DNase, TFs, Histones

In [104]:
pd.concat([dnase.mean().to_frame("DNase"),
           tfs.mean().to_frame("TFs"),
           histones.mean().to_frame("Histones"),
           results.mean().to_frame("All")],
           axis=1).transpose()

Unnamed: 0,AUC,AUPRC,AUPRC Baseline
DNase,0.915492,0.444159,0.042833
TFs,0.947827,0.319175,0.00996
Histones,0.852247,0.367422,0.064047
All,0.932596,0.341635,0.020552


# Cell-type Accuracies for TFs

In [105]:
mean_auc_by_celltype = tfs.groupby('Cell Type').mean()

In [106]:
pd.set_option('display.max_rows', len(mean_auc_by_celltype))
mean_auc_by_celltype

Unnamed: 0_level_0,AUC,AUPRC,AUPRC Baseline
Cell Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A549,0.928284,0.307257,0.010178
AG04449,0.983732,0.618193,0.015305
AG04450,0.976416,0.618812,0.022368
AG09309,0.982669,0.663265,0.021287
AG09319,0.978336,0.648423,0.022623
AG10803,0.977383,0.620232,0.019797
AoAF,0.979361,0.598378,0.016729
BE2_C,0.97479,0.662828,0.02882
BJ,0.977945,0.656495,0.021968
Caco-2,0.988278,0.685641,0.016465
