# Efficientnet results

Calculate all and compare to baseline

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pycytominer import aggregate, annotate, normalize, feature_select, cyto_utils
from cytominer_eval import evaluate
from cytominer_eval.transform import metric_melt
from sklearn.decomposition import PCA
import sys
#sys.path.insert(0, "/Users/mbornhol/git/neural-profiling")

In [2]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/pre-trained/efficient_net/aggregated/aggregated_efficientnet_median.csv', low_memory=False)

In [3]:
df_meta = pd.read_csv('/Users/mbornhol/git/neural-profiling/pre-trained/data-prep/02_index_preperation/repurposing_info_external_moa_map_resolved.tsv', sep='\t')
df_meta = df_meta[['broad_sample', 'moa']]
df_meta.rename(columns = {'broad_sample' : 'Metadata_broad_sample', 'moa' : 'Metadata_moa'}, inplace=True)

In [4]:
features = [f for f in df.columns if f.startswith('eff')]
meta_features = [feat for feat in df.columns if feat not in features]
top_moa = df.Metadata_moa.value_counts()[1:20].keys().tolist()

In [5]:
k_range = [5, 10, 15, 20, 25]
p_range = np.arange(0.995, 0.96, -0.005)

Mad Robustize

In [6]:
mad_dmso = normalize(
    profiles=df,
    features=features,
    samples="Metadata_broad_sample == 'DMSO'",
    method='mad_robustize',
    output_file='none'
)
con = aggregate(
    mad_dmso,
    strata=["Metadata_broad_sample"],
    features=features
)
con = pd.merge(con, df_meta, how='left', on=['Metadata_broad_sample'])
res_enr_mad_dmso = evaluate(
        profiles=con,
        features=features,
        meta_features=['Metadata_broad_sample', "Metadata_moa"],
        replicate_groups=["Metadata_moa"],
        operation="enrichment",
        similarity_metric="pearson",
        enrichment_percentile=p_range
    )

In [7]:
prc = evaluate(
    profiles=con,
    features=features,
    meta_features=['Metadata_broad_sample', "Metadata_moa"],
    replicate_groups=["Metadata_moa"],
    operation="precision_recall",
    similarity_metric="pearson",
    precision_recall_k=k_range
)
# calculate the average
top_prc = prc[prc['Metadata_moa'].isin(top_moa)].reset_index(drop=True)
pr_av = []
for k in top_prc.k.unique():
    pr_av.append(top_prc.query("k==@k").mean().tolist())
res_prec_mad_dmso = pd.DataFrame(pr_av, columns=["k","precision","recall"])


KeyError: 'Metadata_moa'

In [None]:
# Sphereize data
norm_sphere = normalize(
    profiles=df,
    features=features,
    samples="Metadata_broad_sample == 'DMSO'",
    method='spherize',
    output_file='none'
)
con = aggregate(
    norm_sphere,
    strata=["Metadata_broad_sample"],
    features=features
)
# add MOA metadata
con = pd.merge(con, df_meta, how='left', on=['Metadata_broad_sample'])

In [None]:
con.to_csv('consensus_spherized_dmso_eff_mean.csv',index=False)

In [None]:
res_enr_sphere = evaluate(
        profiles=con,
        features=features,
        meta_features=['Metadata_broad_sample', "Metadata_moa"],
        replicate_groups=["Metadata_moa"],
        operation="enrichment",
        similarity_metric="pearson",
        enrichment_percentile=p_range
    )

In [None]:
k_range = [5, 10, 15, 20, 25, 30, 40, 50]
con = con[con["Metadata_broad_sample"] != 'DMSO']
prc = evaluate(
    profiles=con,
    features=features,
    meta_features=['Metadata_broad_sample', "Metadata_moa"],
    replicate_groups=["Metadata_moa"],
    operation="precision_recall",
    similarity_metric="pearson",
    precision_recall_k=k_range
)
# calculate the average
top_prc = prc[prc['Metadata_moa'].isin(top_moa)].reset_index(drop=True)
pr_av = []
for k in top_prc.k.unique():
    pr_av.append(top_prc.query("k==@k").mean().tolist())
res_prec_spherized = pd.DataFrame(pr_av, columns=["k","precision","recall"])

In [None]:
res_prec_spherized

In [None]:
res_pre_baseline = pd.read_csv('/Users/mbornhol/git/neural-profiling/baseline/results/precision_baseline_top20.csv')

res_enr_baseline = pd.read_csv('/Users/mbornhol/git/neural-profiling/baseline/results/res_enrichment_baseline.csv')

In [None]:
%matplotlib qt
plt.style.use({'figure.facecolor':'white'})
plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(12,8))
plt.title("Folds of Enrichment in Top Connections")
plt.plot(100*res_enr_baseline["enrichment_percentile"], res_enr_baseline["ods_ratio"], marker="o", color="green", label = 'cytominer baseline')
plt.plot(100*res_enr_sphere["enrichment_percentile"], res_enr_sphere["ods_ratio"], marker="o", color="blue", label = 'efficientnet spherized')
plt.plot(100*res_enr_mad_dmso["enrichment_percentile"], res_enr_mad_dmso["ods_ratio"], marker="o", color="yellow", label = 'efficientnet mad robustized')
plt.ylabel('ods ratio')
plt.xlabel('percentile')
ax.invert_xaxis()
plt.legend(loc = 'upper right', shadow=True)
plt.show()

In [None]:
# Save best result
res_enr_sphere.to_csv('../results/res_enrichment_spherized.csv', index=False)
res_prec_spherized.to_csv('../results/res_prec_spherized.csv', index=False)

In [None]:
#Precision
%matplotlib qt
plt.style.use({'figure.facecolor':'white'})
plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(12,8))
plt.title("Average precision @k, top 20 MOAs")
plt.plot(res_pre_baseline["k"], res_pre_baseline["precision"], marker="o", color="green", label = 'cytominer baseline')
plt.plot(res_prec_spherized['k'], res_prec_spherized["precision"], marker="o", color="blue", label = 'efficientnet spherized')
plt.plot(res_prec_mad_dmso['k'], res_prec_mad_dmso["precision"], marker="o", color="yellow", label = 'efficientnet mad robustized')
plt.ylabel('precision')
plt.xlabel('k')
plt.legend(loc = 'upper right', shadow=True)
plt.show()

In [None]:
%matplotlib qt
plt.style.use({'figure.facecolor':'white'})
plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(12,8))
plt.title("Average recall @k, top 20 MOAs")
plt.plot(res_pre_baseline["k"], res_pre_baseline["recall"], marker="o", color="green", label = 'cytominer baseline')
plt.plot(res_prec_spherized['k'], res_prec_spherized["recall"], marker="o", color="blue", label = 'efficientnet spherized')
plt.plot(res_prec_mad_dmso['k'], res_prec_mad_dmso["recall"], marker="o", color="yellow", label = 'efficientnet mad robustized')
plt.ylabel('recall')
plt.xlabel('k')
plt.legend(loc = 'upper left', shadow=True)
plt.show()