In [1]:
# cell 1

import pandas as pd
import numpy as np
import os
import re
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline

# 1. Load count matrix using TPM
data_path = '/Users/esrataner/Documents/DATA1030/count_matrix_120'
file_list = [f for f in os.listdir(data_path) if f.endswith('.tsv')]

def read_tpm_file(filename):
    df = pd.read_csv(os.path.join(data_path, filename), sep='\t',
                     usecols=['gene_id', 'TPM'])
    df = df.set_index('gene_id')
    sample_id = filename.replace('.tsv', '')
    df.columns = [sample_id]
    return df

dfs = [read_tpm_file(f) for f in file_list]
merged = pd.concat(dfs, axis=1)  # genes × samples
print("Raw merged shape:", merged.shape)

# 2. remove all spike-in genes 
# these are technical control genes - not needed for ML 
spikein_mask = merged.index.str.lower().str.contains(
    "spikein|ercc"
)

print("Spike-in genes detected:", spikein_mask.sum())

merged = merged.loc[~spikein_mask]
print("After removing spike-ins:", merged.shape)

# 3. Remove all-zero genes
zero_across_all = (merged == 0).all(axis=1)
print("Genes zero everywhere:", zero_across_all.sum())

expr = merged[~zero_across_all]
print("After removing all-zero genes:", expr.shape)

# 4. log1p(TPM) transform — biological normalization
expr_log = np.log1p(expr)

# NOTE: try graphing log1p and log 
# depends on if data is close to zero or not 

Raw merged shape: (59526, 120)
Spike-in genes detected: 97
After removing spike-ins: (59429, 120)
Genes zero everywhere: 14289
After removing all-zero genes: (45140, 120)


In [2]:
# cell 2

meta = pd.read_csv(
    '/Users/esrataner/Documents/DATA1030/tsv/experiment_report_2025_120.tsv',
    sep='\t', skiprows=1
)

# extract sex
meta['Sex'] = meta['Biosample summary'].str.extract(r'(?i)\b(female|male)\b')
meta['Sex'] = meta['Sex'].str.lower()

# parse numeric age
def parse_age(a):
    if pd.isna(a):
        return None
    nums = re.findall(r'\d+', str(a))
    return int(nums[0]) if nums else None

meta['Age'] = meta['Biosample age'].apply(parse_age)

# Age bins
def age_bin(x):
    if pd.isna(x): return None
    if x < 65: return "60-64"
    if x < 70: return "65-69"
    if x < 75: return "70-74"
    if x < 80: return "75-79"
    if x < 85: return "80-84"
    if x < 90: return "85-89"
    return "90+"

meta['Age_Ordinal'] = meta['Age'].apply(age_bin)

age_order = ["60-64","65-69","70-74","75-79","80-84","85-89","90+"]
meta['Age_Ordinal'] = pd.Categorical(
    meta['Age_Ordinal'], categories=age_order, ordered=True
)

# FIX!!  Extract *all* ENCFF file IDs (not just first)
# Find ALL ENCFF IDs
meta['File_IDs'] = meta['Files'].str.findall(r'ENCFF\w+')

# Expand metadata so each row contains ONE ENCFF ID
meta_expanded = meta.explode('File_IDs')

# Keep only relevant fields
meta_expanded = meta_expanded[
    ['Accession', 'Sex', 'Age_Ordinal', 'Biosample accession', 'File_IDs']
]

# Drop rows missing file IDs
meta_expanded = meta_expanded.dropna(subset=['File_IDs'])

print("Expanded metadata shape:", meta_expanded.shape)
# Expanded metadata shape: (1220, 5)

# exp 
# in the raw ENCODE metadata, each biological sample (ENCSR) has multiple ENCFF files 
# (technical sequencing files: BAMs, fastqs, quantifications, etc)

# After expanding, u get 1 row per ENCFF file
# so if each ENCSR has ~10 ENCFF files ->  120 biological samples become ~1220 rows.
# so 1220 is NOT 120 samples - it is technical files, NOT samples

# MOST IMPORTANTLY - do NOT need GroupShuffleSplit
# 1. each ENCSR is represented only once
# 2. no ENCFF technical replicates remain#
# 3. df_full has 1 row per biological sample





Expanded metadata shape: (1220, 5)


In [3]:
# cell 3 

# List of ENCFF IDs found in expression matrix
expr_samples = expr_log.columns.tolist()

# Match metadata rows where File_IDs map to expression samples
meta_matched = meta_expanded[meta_expanded['File_IDs'].isin(expr_samples)].copy()
print("Matched metadata rows:", meta_matched.shape)

# Step 1: Relabel expression matrix columns (ENCFF → ENCSR)
rename_dict = dict(zip(meta_matched['File_IDs'], meta_matched['Accession']))
expr_labeled = expr_log.rename(columns=rename_dict)

# Step 2: Identify replicate sample IDs (appear twice)
replicate_accessions = expr_labeled.columns[
    expr_labeled.columns.duplicated()
].unique()

print("Replicate sample accessions:", replicate_accessions.tolist())

# Step 3: DROP the replicate sample completely
expr_no_reps = expr_labeled.drop(columns=list(replicate_accessions))
meta_no_reps = meta_matched[
    ~meta_matched['Accession'].isin(replicate_accessions)
].copy()

print("Expression shape after removing replicates:", expr_no_reps.shape)
print("Metadata shape after removing replicates:", meta_no_reps.shape)

# Step 4: Align expression with metadata
expr_T = expr_no_reps.T
expr_T.index.name = 'Accession'
expr_T = expr_T.reset_index()

# Deduplicate final metadata
meta_unique = meta_no_reps.drop_duplicates(subset=['Accession'])

# Step 5: Merge into final ML-ready dataset
df_full = expr_T.merge(
    meta_unique.drop(columns=['File_IDs']),
    on='Accession',
    how='inner'
).set_index('Accession')

print("Final merged dataset shape:", df_full.shape)
print(df_full.head())


Matched metadata rows: (120, 5)
Replicate sample accessions: []
Expression shape after removing replicates: (45140, 120)
Metadata shape after removing replicates: (120, 5)
Final merged dataset shape: (120, 45143)
             13023     26893  30031  30958     30964  ENSG00000000003.14  \
Accession                                                                  
ENCSR800PJQ    0.0  0.506818    0.0    0.0  0.000000            1.348073   
ENCSR133PLR    0.0  0.000000    0.0    0.0  2.324347            1.340250   
ENCSR418WMG    0.0  0.000000    0.0    0.0  0.000000            1.181727   
ENCSR013HWB    0.0  0.438255    0.0    0.0  0.000000            1.313724   
ENCSR693KOP    0.0  0.000000    0.0    0.0  0.000000            1.081805   

             ENSG00000000005.5  ENSG00000000419.12  ENSG00000000457.13  \
Accession                                                                
ENCSR800PJQ           0.000000            2.157559            1.444563   
ENCSR133PLR           0.122218  

In [4]:
# cell 4 

# TRAIN / VAL / TEST SPLIT
from sklearn.model_selection import train_test_split

# (1) Define features + target
X = df_full.drop(columns=["Sex", "Biosample accession"]) #string carried over 
y = df_full["Sex"].map({"female": 0, "male": 1}).astype(int)

y = y.astype(int)

# (2) First split: Train vs Temp (Val+Test)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)

# (3) Split Temp into Validation + Test
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp
)

# Print shapes
print("Dataset Splits:")
print("Train:", X_train.shape, "Target:", y_train.shape)
print("Val:  ", X_val.shape,   "Target:", y_val.shape)
print("Test: ", X_test.shape,  "Target:", y_test.shape)


Dataset Splits:
Train: (84, 45141) Target: (84,)
Val:   (18, 45141) Target: (18,)
Test:  (18, 45141) Target: (18,)


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from xgboost import XGBClassifier
import numpy as np


#  scale_pos_weight
neg = (y_train == 0).sum()
pos = (y_train == 1).sum()
scale_pos_weight = neg / pos
print(f"scale_pos_weight (initial split) = {scale_pos_weight:.4f}")



gene_cols = [c for c in X_train.columns if c != "Age_Ordinal"]

def make_preprocessor():
    """Fresh ColumnTransformer each time (no leakage)."""
    return ColumnTransformer([
        ("num", StandardScaler(), gene_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), ["Age_Ordinal"])
    ])


# early-stopping model to get best_iter

pre_early = make_preprocessor()
pre_early.fit(X_train)

X_train_trans = pre_early.transform(X_train)
X_val_trans   = pre_early.transform(X_val)

early_model = XGBClassifier(
    learning_rate=0.05,
    max_depth=3,
    subsample=0.8,
    colsample_bytree=0.2,
    eval_metric="logloss",
    tree_method="hist",
    early_stopping_rounds=10,
    random_state=42,
    scale_pos_weight=scale_pos_weight
)

early_model.fit(
    X_train_trans,
    y_train,
    eval_set=[(X_val_trans, y_val)],
    verbose=True
)

best_iter = early_model.best_iteration
print("Best n_estimators from early stopping:", best_iter)


scale_pos_weight (initial split) = 3.0000
[0]	validation_0-logloss:0.66791
[1]	validation_0-logloss:0.64257
[2]	validation_0-logloss:0.63504
[3]	validation_0-logloss:0.62314
[4]	validation_0-logloss:0.59624
[5]	validation_0-logloss:0.59290
[6]	validation_0-logloss:0.58634
[7]	validation_0-logloss:0.56521
[8]	validation_0-logloss:0.55086
[9]	validation_0-logloss:0.53728
[10]	validation_0-logloss:0.53500
[11]	validation_0-logloss:0.51820
[12]	validation_0-logloss:0.49902
[13]	validation_0-logloss:0.48449
[14]	validation_0-logloss:0.49170
[15]	validation_0-logloss:0.48403
[16]	validation_0-logloss:0.47638
[17]	validation_0-logloss:0.47090
[18]	validation_0-logloss:0.46432
[19]	validation_0-logloss:0.45436
[20]	validation_0-logloss:0.44672
[21]	validation_0-logloss:0.43318
[22]	validation_0-logloss:0.42612
[23]	validation_0-logloss:0.43208
[24]	validation_0-logloss:0.42850
[25]	validation_0-logloss:0.42556
[26]	validation_0-logloss:0.41823
[27]	validation_0-logloss:0.41624
[28]	validation_

In [6]:
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier

def make_xgb_pipeline(n_estimators, scale_pos_weight, random_state=42):
    """XGB pipeline with preprocessor inside (no leakage across CV)."""
    return Pipeline([
        ("preprocessor", make_preprocessor()),
        ("model", XGBClassifier(
            objective="binary:logistic",
            eval_metric="logloss",
            tree_method="hist",
            random_state=random_state,
            n_estimators=n_estimators,
            scale_pos_weight=scale_pos_weight
        ))
    ])


In [None]:
import optuna
from sklearn.model_selection import StratifiedKFold, cross_val_score

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

def objective(trial):

    params = {
        "model__max_depth": trial.suggest_int("model__max_depth", 2, 6),
        "model__learning_rate": trial.suggest_float("model__learning_rate", 0.01, 0.1),
        "model__subsample": trial.suggest_float("model__subsample", 0.6, 1.0),
        "model__colsample_bytree": trial.suggest_float("model__colsample_bytree", 0.1, 0.6),
        "model__reg_alpha": trial.suggest_float("model__reg_alpha", 0.0, 3.0),
        "model__reg_lambda": trial.suggest_float("model__reg_lambda", 0.0, 3.0),
    }

    pipe = make_xgb_pipeline(best_iter, scale_pos_weight, random_state=42)
    pipe.set_params(**params)

    pr_auc = cross_val_score(
        pipe,
        X_train,
        y_train,
        scoring="average_precision",
        cv=cv,
        n_jobs=-1
    ).mean()

    return pr_auc

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("\n OPTUNA RESULTS ")
print("Best CV PR-AUC (Optuna):", study.best_value)
print("\nBest hyperparameters (Optuna):")
for k, v in study.best_params.items():
    print(f"  {k}: {v}")

best_optuna_params = study.best_params


pd.Series(best_optuna_params).to_json("optuna_best_params.json")
print("\nSaved: optuna_best_params.json")


[I 2025-12-12 03:09:26,578] A new study created in memory with name: no-name-71ab1c6b-0cc1-4f5e-8759-d7d058743f56
[I 2025-12-12 03:09:30,886] Trial 0 finished with value: 0.6961694677871149 and parameters: {'model__max_depth': 4, 'model__learning_rate': 0.08078644467911282, 'model__subsample': 0.6218543331143649, 'model__colsample_bytree': 0.4008170769591276, 'model__reg_alpha': 0.503425676723821, 'model__reg_lambda': 0.10440984448346191}. Best is trial 0 with value: 0.6961694677871149.
[I 2025-12-12 03:09:34,543] Trial 1 finished with value: 0.6848389355742297 and parameters: {'model__max_depth': 3, 'model__learning_rate': 0.05280513816669777, 'model__subsample': 0.776093360161199, 'model__colsample_bytree': 0.3893225383905389, 'model__reg_alpha': 1.28269762470195, 'model__reg_lambda': 1.2047254487839938}. Best is trial 0 with value: 0.6961694677871149.
[I 2025-12-12 03:09:38,220] Trial 2 finished with value: 0.7304761904761905 and parameters: {'model__max_depth': 4, 'model__learning_


===== OPTUNA RESULTS =====
Best CV PR-AUC (Optuna): 0.8254761904761905

Best hyperparameters (Optuna):
  model__max_depth: 3
  model__learning_rate: 0.02594529453035626
  model__subsample: 0.8097992181431611
  model__colsample_bytree: 0.16799580159890665
  model__reg_alpha: 1.4061736047358249
  model__reg_lambda: 1.6099873145029557

Saved: optuna_best_params.json


In [None]:
from sklearn.metrics import average_precision_score, classification_report

# combine Train + Val
X_train_full = pd.concat([X_train, X_val], axis=0)
y_train_full = pd.concat([y_train, y_val], axis=0)

# build final Optuna-tuned pipeline
final_optuna_pipe = make_xgb_pipeline(best_iter, scale_pos_weight, random_state=42)
final_optuna_pipe.set_params(**best_optuna_params)

final_optuna_pipe.fit(X_train_full, y_train_full)

# test
y_test_proba = final_optuna_pipe.predict_proba(X_test)[:, 1]
y_test_pred  = final_optuna_pipe.predict(X_test)

test_ap = average_precision_score(y_test, y_test_proba)

print("\nFINAL TEST RESULTS (Optuna XGBoost)")
print(f"TEST PR-AUC: {test_ap:.4f}")
print("\nTest Classification Report:\n", classification_report(y_test, y_test_pred))


optuna_test_results = pd.DataFrame({
    "y_true": y_test,
    "y_proba_male": y_test_proba,
    "y_pred": y_test_pred
})
optuna_test_results.to_csv("optuna_test_predictions.csv", index=False)
print("Saved: optuna_test_predictions.csv")



FINAL TEST RESULTS (Optuna XGBoost)
TEST PR-AUC: 0.9500

Test Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93        14
           1       0.75      0.75      0.75         4

    accuracy                           0.89        18
   macro avg       0.84      0.84      0.84        18
weighted avg       0.89      0.89      0.89        18

Saved: optuna_test_predictions.csv


In [None]:
import shap
import matplotlib.pyplot as plt

pre = final_optuna_pipe.named_steps["preprocessor"]
model = final_optuna_pipe.named_steps["model"]

X_test_trans = pre.transform(X_test)
feature_names = pre.get_feature_names_out()

explainer = shap.TreeExplainer(model)
shap_vals = explainer.shap_values(X_test_trans)

# binary model returns [class0, class1], use class 1 (male)
if isinstance(shap_vals, list):
    shap_vals = shap_vals[1]

=
# Global ranking by mean | SHAP|

mean_abs_shap = np.abs(shap_vals).mean(axis=0)

shap_ranked = (
    pd.DataFrame({"feature": feature_names, "mean_abs_shap": mean_abs_shap})
    .sort_values("mean_abs_shap", ascending=False)
)

shap_ranked.to_csv("optuna_shap_global_ranking.csv", index=False)

print("\nTop 10 features by |SHAP|:")
display(shap_ranked.head(10))


# directional SHAP: male vs female

male_idx = np.where(y_test == 1)[0]
female_idx = np.where(y_test == 0)[0]

male_shap = shap_vals[male_idx].mean(axis=0)
female_shap = shap_vals[female_idx].mean(axis=0)

shap_direction = pd.DataFrame({
    "feature": feature_names,
    "male_shap": male_shap,
    "female_shap": female_shap,
})
shap_direction["difference_male_minus_female"] = (
    shap_direction["male_shap"] - shap_direction["female_shap"]
)

shap_direction_sorted = shap_direction.sort_values(
    "difference_male_minus_female", ascending=False
)

shap_direction_sorted.to_csv("optuna_shap_directional.csv", index=False)

print("\nTop 10 Masculinizing features (Optuna):")
display(shap_direction_sorted.head(10))

print("\nTop 10 Feminizing features (Optuna):")
display(shap_direction_sorted.tail(10))



Top 10 features by |SHAP|:


Unnamed: 0,feature,mean_abs_shap
4256,num__ENSG00000114374.12,0.248604
2100,num__ENSG00000099715.14,0.098816
22845,num__ENSG00000227494.2,0.092422
29119,num__ENSG00000241859.6,0.083457
26035,num__ENSG00000233864.7,0.080106
18783,num__ENSG00000207445.1,0.076641
6127,num__ENSG00000129824.15,0.07367
13895,num__ENSG00000176728.7,0.046967
325,num__ENSG00000012817.15,0.039765
28139,num__ENSG00000238067.1,0.03539



Top 10 Masculinizing features (Optuna):


Unnamed: 0,feature,male_shap,female_shap,difference_male_minus_female
4256,num__ENSG00000114374.12,0.15077,-0.137022,0.287791
2100,num__ENSG00000099715.14,0.072511,-0.079356,0.151867
22845,num__ENSG00000227494.2,0.075521,-0.068536,0.144057
29119,num__ENSG00000241859.6,0.054987,-0.065748,0.120736
26035,num__ENSG00000233864.7,0.053117,-0.065477,0.118594
6127,num__ENSG00000129824.15,0.06116,-0.042567,0.103727
13895,num__ENSG00000176728.7,0.04103,-0.033796,0.074827
325,num__ENSG00000012817.15,0.031586,-0.030914,0.0625
28139,num__ENSG00000238067.1,0.033372,-0.025553,0.058925
18783,num__ENSG00000207445.1,0.051218,0.00127,0.049948



Top 10 Feminizing features (Optuna):


Unnamed: 0,feature,male_shap,female_shap,difference_male_minus_female
18850,num__ENSG00000211645.2,-0.005175,-0.000751,-0.004424
760,num__ENSG00000055957.10,-0.005949,-0.000831,-0.005118
32533,num__ENSG00000253824.1,-0.008272,-0.002975,-0.005297
15352,num__ENSG00000184357.4,-0.004892,0.000717,-0.005609
38958,num__ENSG00000269416.5,-0.006265,0.000318,-0.006583
40256,num__ENSG00000272554.1,-0.008061,-0.001112,-0.006949
29243,num__ENSG00000242259.8,-0.009508,-0.000281,-0.009227
17106,num__ENSG00000197980.12,-0.010575,-0.000465,-0.01011
40956,num__ENSG00000274001.1,-0.007739,0.002964,-0.010703
14870,num__ENSG00000182256.12,-0.00197,0.013562,-0.015532


In [None]:
from sklearn.model_selection import train_test_split

seed_results = []
shap_records = []
random_states = [0, 1, 2, 3, 4]

for seed in random_states:
    print(f"\n===== OPTUNA XGB — SEED {seed} =====")


    X_train_s, X_temp_s, y_train_s, y_temp_s = train_test_split(
        X, y, test_size=0.30, stratify=y, random_state=seed
    )
    X_val_s, X_test_s, y_val_s, y_test_s = train_test_split(
        X_temp_s, y_temp_s, test_size=0.50, stratify=y_temp_s, random_state=seed
    )


    neg_s = (y_train_s == 0).sum()
    pos_s = (y_train_s == 1).sum()
    scale_pos_weight_s = neg_s / pos_s

    # build Optuna-tuned pipeline for this seed
    optuna_seed_pipe = make_xgb_pipeline(best_iter, scale_pos_weight_s, random_state=seed)
    optuna_seed_pipe.set_params(**best_optuna_params)


    X_train_full_s = pd.concat([X_train_s, X_val_s])
    y_train_full_s = pd.concat([y_train_s, y_val_s])

    optuna_seed_pipe.fit(X_train_full_s, y_train_full_s)


    y_test_proba_s = optuna_seed_pipe.predict_proba(X_test_s)[:, 1]
    test_ap_s = average_precision_score(y_test_s, y_test_proba_s)

    seed_results.append({
        "seed": seed,
        "test_ap": test_ap_s
    })
    print(f"Test PR-AUC (seed {seed}): {test_ap_s:.4f}")


    pre_s = optuna_seed_pipe.named_steps["preprocessor"]
    model_s = optuna_seed_pipe.named_steps["model"]

    X_test_trans_s = pre_s.transform(X_test_s)
    feat_names_s = pre_s.get_feature_names_out()

    expl_s = shap.TreeExplainer(model_s)
    shap_vals_s = expl_s.shap_values(X_test_trans_s)
    if isinstance(shap_vals_s, list):
        shap_vals_s = shap_vals_s[1]

    shap_df_s = pd.DataFrame(shap_vals_s, columns=feat_names_s)
    shap_df_s["seed"] = seed
    shap_records.append(shap_df_s)


seed_df = pd.DataFrame(seed_results)
display(seed_df)

seed_df.to_csv("optuna_seed_metrics.csv", index=False)
print("\nSaved: optuna_seed_metrics.csv")


# SHAP stability across seeds

shap_all = pd.concat(shap_records, ignore_index=True)

# make sure 2 drop seed col and compute mean SHAP and STDEV SHAP across all seed-samples
shap_features_only = shap_all.drop(columns=["seed"])

mean_abs_shap = shap_features_only.abs().mean()
std_abs_shap = shap_features_only.abs().std()

shap_stability = pd.DataFrame({
    "feature": mean_abs_shap.index,
    "mean_abs_shap": mean_abs_shap.values,
    "std_abs_shap": std_abs_shap.values
}).sort_values("mean_abs_shap", ascending=False)

shap_stability.to_csv("optuna_shap_stability_genes.csv", index=False)
print("Saved: optuna_shap_stability_genes.csv")

print("\nTop 20 stable features by mean |SHAP| (Optuna):")
display(shap_stability.head(20))



===== OPTUNA XGB — SEED 0 =====
Test PR-AUC (seed 0): 0.7333

===== OPTUNA XGB — SEED 1 =====
Test PR-AUC (seed 1): 0.6278

===== OPTUNA XGB — SEED 2 =====
Test PR-AUC (seed 2): 0.8750

===== OPTUNA XGB — SEED 3 =====
Test PR-AUC (seed 3): 0.5667

===== OPTUNA XGB — SEED 4 =====
Test PR-AUC (seed 4): 0.9500


Unnamed: 0,seed,test_ap
0,0,0.733333
1,1,0.627778
2,2,0.875
3,3,0.566667
4,4,0.95



Saved: optuna_seed_metrics.csv
Saved: optuna_shap_stability_genes.csv

Top 20 stable features by mean |SHAP| (Optuna):


Unnamed: 0,feature,mean_abs_shap,std_abs_shap
4256,num__ENSG00000114374.12,0.152567,0.057194
15254,num__ENSG00000183878.15,0.152123,0.046781
26035,num__ENSG00000233864.7,0.130063,0.079005
22845,num__ENSG00000227494.2,0.051597,0.041459
325,num__ENSG00000012817.15,0.045051,0.029024
42591,num__ENSG00000278847.1,0.042625,0.027995
29119,num__ENSG00000241859.6,0.041711,0.028936
18553,num__ENSG00000206159.10,0.037985,0.027694
2100,num__ENSG00000099715.14,0.034362,0.022351
17312,num__ENSG00000198692.9,0.033483,0.029792
