# Tier Analysis

In [1]:
from datetime import datetime

print("\033[32m{}\033[0m".format(datetime.now().strftime("%B %d, %Y %H:%M:%S")))

[32mJune 18, 2022 15:36:53[0m


In [2]:
import pandas as pd
from pandas import read_csv

# Confidence 0.80s
BRCA_PREDICTION_PATH = r"0.80\BRCA_high_confidence_disruptive_0.80_cosmic_2022-06-18.csv"
COAD_PREDICTION_PATH = r"0.80\COAD_high_confidence_disruptive_0.80_cosmic_2022-06-18.csv"
ESCA_PREDICTION_PATH = r"0.80\ESCA_high_confidence_disruptive_0.80_cosmic_2022-06-18.csv"
GBM_PREDICTION_PATH = r"0.80\GBM_high_confidence_disruptive_0.80_cosmic_2022-06-18.csv"
HNSC_PREDICTION_PATH = r"0.80\HNSC_high_confidence_disruptive_0.80_cosmic_2022-06-18.csv"
OV_PREDICTION_PATH = r"0.80\OV_high_confidence_disruptive_0.80_cosmic_2022-06-18.csv"

# Reflect changes in the modules immediately.
%load_ext autoreload
%autoreload 2

In [3]:
def get_summary_table(tcga_to_prediction_data):
    summary_entries = []

    for tcga, tcga_data in tcga_to_prediction_data.items():
        summary_entries.append(
            (
                tcga,
                len(tcga_data),
                len(
                    tcga_data[
                        tcga_data["MOST_SIGNIFICANT_CODON_TIER"].isin(["TIER_1", "TIER_2", "TIER_3"])
                    ]
                ),
                len(
                    tcga_data[
                        tcga_data["CGC_status"].isin(["Tier 1", "Tier 2", "Tier 3"])
                    ]
                ),
            )
        )

    summary_data = pd.DataFrame(
        summary_entries,
        columns=[
            "TCGA",
            "Num Total Entries",
            "Num Entries with Tier 1, Tier 2, Tier 3 annotation",
            "Num Entries containing a mutation CGC Gene",
        ]
    )

    return summary_data

In [4]:
brca_prediction_data = read_csv(BRCA_PREDICTION_PATH)
coad_prediction_data = read_csv(COAD_PREDICTION_PATH)
esca_prediction_data = read_csv(ESCA_PREDICTION_PATH)
gbm_prediction_data = read_csv(GBM_PREDICTION_PATH)
hnsc_prediction_data = read_csv(HNSC_PREDICTION_PATH)
ov_prediction_data = read_csv(OV_PREDICTION_PATH)

TCGA_TO_PREDICTION_DATA = {
    "BRCA": brca_prediction_data,
    "COAD": coad_prediction_data,
    "ESCA": esca_prediction_data,
    "GBM": gbm_prediction_data,
    "HNSC": hnsc_prediction_data,
    "OV": ov_prediction_data,
}

In [5]:
get_summary_table(TCGA_TO_PREDICTION_DATA)

Unnamed: 0,TCGA,Num Total Entries,"Num Entries with Tier 1, Tier 2, Tier 3 annotation",Num Entries containing a mutation CGC Gene
0,BRCA,334,64,101
1,COAD,683,47,94
2,ESCA,131,36,40
3,GBM,269,56,61
4,HNSC,364,92,132
5,OV,244,59,69


In [6]:
print(get_summary_table(TCGA_TO_PREDICTION_DATA).to_latex())

\begin{tabular}{llrrr}
\toprule
{} &  TCGA &  Num Total Entries &  Num Entries with Tier 1, Tier 2, Tier 3 annotation &  Num Entries containing a mutation CGC Gene \\
\midrule
0 &  BRCA &                334 &                                                 64 &                                         101 \\
1 &  COAD &                683 &                                                 47 &                                          94 \\
2 &  ESCA &                131 &                                                 36 &                                          40 \\
3 &   GBM &                269 &                                                 56 &                                          61 \\
4 &  HNSC &                364 &                                                 92 &                                         132 \\
5 &    OV &                244 &                                                 59 &                                          69 \\
\bottomrule
\end{tabular}


---