In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pycytominer import aggregate, annotate, normalize, feature_select, cyto_utils
from cytominer_eval import evaluate
from cytominer_eval.transform import metric_melt
from pycytominer.cyto_utils import infer_cp_features
import seaborn as sns
from sklearn.decomposition import PCA
import sys
sys.path.insert(0, "/Users/mbornhol/git/neural-profiling")
from utils import eval

In [56]:
index = pd.read_csv("../00_index.csv")
batch = pd.read_csv('/Users/mbornhol/git/neural-profiling/baseline/01_data/level_3_data/sub_level3.csv')
batch = batch[['Metadata_Batch_Number', 'Metadata_Plate', 'Metadata_Well']]
p = np.arange(0.995, 0.98, -0.005)


  interactivity=interactivity, compiler=compiler, result=result)


In [57]:
def batch_eff(df, frac = 0.2):
    DMSO = df[df["Metadata_broad_sample"] == "DMSO"]
    DMSO = pd.merge(DMSO, batch, how="left", on=['Metadata_Plate', 'Metadata_Well'])
    features = [f for f in DMSO.columns if f.startswith("eff")]
    meta_features=[f for f in DMSO.columns if not f.startswith("eff")]
    DMSO = DMSO.sample(frac=frac)
    print(DMSO.shape)
    plate_corr = evaluate(
        profiles=DMSO,
        features=features,
        meta_features=meta_features,
        replicate_groups=['Metadata_Plate'],
        operation="enrichment",
        similarity_metric="pearson",
        enrichment_percentile=p,
    )
    batch_corr = evaluate(
        profiles=DMSO,
        features=features,
        meta_features=meta_features,
        replicate_groups=['Metadata_Batch_Number'],
        operation="enrichment",
        similarity_metric="pearson",
        enrichment_percentile=p,
    )
    return pd.concat([plate_corr, batch_corr], axis=1)

In [59]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1102/1102_aggregated_median.csv', low_memory=False)
pre = batch_eff(df)
pre

(650, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.730764,229.576805,0.0,0.995,0.730764,inf,0.0
1,0.99,0.634724,151.486111,0.0,0.99,0.634724,1764.689051,0.0
2,0.985,0.572483,117.175643,0.0,0.985,0.572483,327.670016,0.0
3,0.98,0.528446,104.395126,0.0,0.98,0.528446,145.565224,0.0


In [None]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/00_thesis/pre_trained/aggregated_efficientnet_median.csv', low_memory=False)
pre = batch_eff(df)
pre

In [34]:
# simple trained model
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1008/1008_aggregated_median.csv', low_memory=False)
simple = batch_eff(df, frac=0.8)
simple

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.804328,175.584088,0.0,0.995,0.804328,23097.460848,0.0
1,0.99,0.721341,108.494639,0.0,0.99,0.721341,3022.323589,0.0
2,0.985,0.656617,87.775223,0.0,0.985,0.656617,1048.860161,0.0
3,0.98,0.600813,78.317812,0.0,0.98,0.600813,445.886209,0.0


In [35]:
# simple trained model. Aug on
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1010/1010_aggregated_median.csv', low_memory=False)
simple_aug = batch_eff(df, frac=0.8)
simple_aug

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.817762,126.394886,0.0,0.995,0.817762,3277.065203,0.0
1,0.99,0.745647,83.535071,0.0,0.99,0.745647,635.479434,0.0
2,0.985,0.691788,67.578931,0.0,0.985,0.691788,292.065106,0.0
3,0.98,0.646383,58.800579,0.0,0.98,0.646383,167.235243,0.0


In [44]:
# simple trained model. Aug off. label on
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1012/1012_aggregated_median.csv', low_memory=False)
simple_aug = batch_eff(df, frac=0.8)
simple_aug

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.786295,121.453391,0.0,0.995,0.786295,2030.21445,0.0
1,0.99,0.701529,78.81152,0.0,0.99,0.701529,452.052936,0.0
2,0.985,0.637497,62.79966,0.0,0.985,0.637497,208.849538,0.0
3,0.98,0.586275,54.307242,0.0,0.98,0.586275,120.700788,0.0


In [36]:
# stronger trained model. Aug off
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1101/1101_aggregated_median.csv', low_memory=False)
good_off = batch_eff(df, frac=0.8)
good_off

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.779838,219.635693,0.0,0.995,0.779838,11493.216113,0.0
1,0.99,0.689968,131.641079,0.0,0.99,0.689968,927.042423,0.0
2,0.985,0.624161,106.333779,0.0,0.985,0.624161,327.737967,0.0
3,0.98,0.570379,93.619948,0.0,0.98,0.570379,170.990431,0.0


In [37]:
# stronger trained model. Aug on
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1028/1028_aggregated_median.csv', low_memory=False)
good_aug = batch_eff(df, frac=0.8)
good_aug

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.670208,182.316746,0.0,0.995,0.670208,4588.357721,0.0
1,0.99,0.546662,113.081949,0.0,0.99,0.546662,1549.247064,0.0
2,0.985,0.466179,89.769236,0.0,0.985,0.466179,476.026613,0.0
3,0.98,0.40617,79.050021,0.0,0.98,0.40617,228.119729,0.0


In [38]:
# compare 1021 to 1023
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1021/1021_aggregated_median.csv', low_memory=False)
res = batch_eff(df, frac=0.8)
res

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.765222,217.408821,0.0,0.995,0.765222,13847.85402,0.0
1,0.99,0.673064,130.380671,0.0,0.99,0.673064,647.66222,0.0
2,0.985,0.606563,103.7558,0.0,0.985,0.606563,236.718946,0.0
3,0.98,0.553369,90.803949,0.0,0.98,0.553369,137.617034,0.0


In [39]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1023/1023_aggregated_median.csv', low_memory=False)
res = batch_eff(df, frac=0.8)
res

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.757802,129.063519,0.0,0.995,0.757802,3143.52287,0.0
1,0.99,0.668142,83.988141,0.0,0.99,0.668142,485.681582,0.0
2,0.985,0.600097,67.682602,0.0,0.985,0.600097,222.173732,0.0
3,0.98,0.54479,59.378128,0.0,0.98,0.54479,127.460284,0.0


In [40]:
# compare 1003 to 1028
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1028/1028_aggregated_median.csv', low_memory=False)
res = batch_eff(df, frac=0.8)
res

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.674535,182.442955,0.0,0.995,0.674535,6283.301226,0.0
1,0.99,0.550454,113.44177,0.0,0.99,0.550454,1505.758041,0.0
2,0.985,0.468641,89.159135,0.0,0.985,0.468641,487.788429,0.0
3,0.98,0.408118,78.849803,0.0,0.98,0.408118,233.370598,0.0


In [41]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1103/1103_aggregated_median.csv', low_memory=False)
res = batch_eff(df, frac=0.8)
res

(2601, 1299)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.733304,274.688535,0.0,0.995,0.733304,34604.25548,0.0
1,0.99,0.624116,168.134246,0.0,0.99,0.624116,2054.585179,0.0
2,0.985,0.547438,136.392114,0.0,0.985,0.547438,481.614828,0.0
3,0.98,0.490832,123.163584,0.0,0.98,0.490832,189.97656,0.0


In [42]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/00_thesis/pre_trained/aggregated_efficientnet_median.csv', low_memory=False)
features = [f for f in df.columns if f.startswith("eff")]
sphere = normalize(
    profiles=df,
    features=features,
    samples="Metadata_broad_sample == 'DMSO'",
    method='spherize',
    output_file='none'
)
res = batch_eff(sphere, frac=0.8)
res


(2591, 6418)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.000697,0.740345,0.999987,0.995,0.000697,0.973103,0.976678
1,0.99,0.000501,0.730888,1.0,0.99,0.000501,0.94715,1.0
2,0.985,0.000397,0.707153,1.0,0.985,0.000397,0.94479,1.0
3,0.98,0.000325,0.692581,1.0,0.98,0.000325,0.941523,1.0


In [43]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1008/1008_aggregated_median.csv', low_memory=False)
features = [f for f in df.columns if f.startswith("eff")]
sphere = normalize(
    profiles=df,
    features=features,
    samples="Metadata_broad_sample == 'DMSO'",
    method='spherize',
    output_file='none'
)
res = batch_eff(sphere, frac=0.8)
res

(2601, 1298)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.070403,143.047127,0.0,0.995,0.070403,72.806668,0.0
1,0.99,0.051515,67.842443,0.0,0.99,0.051515,13.658548,0.0
2,0.985,0.045018,44.993786,0.0,0.985,0.045018,7.656754,0.0
3,0.98,0.041157,34.054677,0.0,0.98,0.041157,5.567272,0.0


In [51]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1010/1010_aggregated_median.csv', low_memory=False)
features = [f for f in df.columns if f.startswith("eff")]
sphere = normalize(
    profiles=df,
    features=features,
    samples="Metadata_broad_sample == 'DMSO'",
    method='spherize',
    output_file='none'
)
res = batch_eff(sphere, frac=0.8)
res

(2601, 1298)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.075622,118.782337,0.0,0.995,0.075622,63.468175,0.0
1,0.99,0.054245,60.412948,0.0,0.99,0.054245,15.131726,0.0
2,0.985,0.046992,41.121365,0.0,0.985,0.046992,8.437399,0.0
3,0.98,0.042707,31.511022,0.0,0.98,0.042707,6.114858,0.0


In [53]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1101/1101_aggregated_median.csv', low_memory=False)
features = [f for f in df.columns if f.startswith("eff")]
sphere = normalize(
    profiles=df,
    features=features,
    samples="Metadata_broad_sample == 'DMSO'",
    method='spherize',
    output_file='none'
)
res = batch_eff(sphere, frac=0.8)
res

(2601, 1298)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.070458,158.808846,0.0,0.995,0.070458,61.049904,0.0
1,0.99,0.052162,75.689039,0.0,0.99,0.052162,12.259392,0.0
2,0.985,0.045815,50.137007,0.0,0.985,0.045815,6.94185,0.0
3,0.98,0.04195,37.867013,0.0,0.98,0.04195,5.093136,0.0


In [54]:
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1028/1028_aggregated_median.csv', low_memory=False)
features = [f for f in df.columns if f.startswith("eff")]
sphere = normalize(
    profiles=df,
    features=features,
    samples="Metadata_broad_sample == 'DMSO'",
    method='spherize',
    output_file='none'
)
res = batch_eff(sphere, frac=0.8)
res

(2601, 1298)


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value,enrichment_percentile.1,threshold.1,ods_ratio.1,p-value.1
0,0.995,0.069744,134.356108,0.0,0.995,0.069744,55.295656,0.0
1,0.99,0.051283,65.21192,0.0,0.99,0.051283,11.519203,0.0
2,0.985,0.044885,43.954279,0.0,0.985,0.044885,6.653269,0.0
3,0.98,0.041094,33.386348,0.0,0.98,0.041094,4.91387,0.0


1280 features were identified


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value
0,0.995,0.723653,3.643561,1.440658e-44
1,0.99,0.631884,3.182542,3.344909e-63


In [None]:
# simple trained model
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1008/1008_aggregated_median.csv', low_memory=False)
e = eval.evaluation(df)
df = e.norm_agg(method=None)
res = e.eval(df, operation = 'enrichment')
res[:2]

In [48]:
# simple trained model. Aug on
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1010/1010_aggregated_median.csv', low_memory=False)
e = eval.evaluation(df)
df = e.norm_agg(method=None)
res = e.eval(df, operation = 'enrichment')
res[:2]

1280 features were identified


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value
0,0.995,0.740983,3.600285,1.908701e-43
1,0.99,0.657107,3.226498,3.1334379999999998e-65


In [49]:
# stronger trained model. Aug off
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1101/1101_aggregated_median.csv', low_memory=False)
e = eval.evaluation(df)
df = e.norm_agg(method=None)
res = e.eval(df, operation = 'enrichment')
res[:2]

1280 features were identified


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value
0,0.995,0.731456,2.617108,9.391377e-21
1,0.99,0.655039,2.746443,2.7845849999999997e-44


In [50]:
# stronger trained model. Aug on
df = pd.read_csv('/Users/mbornhol/git/neural-profiling/training/runs/1028/1028_aggregated_median.csv', low_memory=False)
e = eval.evaluation(df)
df = e.norm_agg(method=None)
res = e.eval(df, operation = 'enrichment')
res[:2]

1280 features were identified


Unnamed: 0,enrichment_percentile,threshold,ods_ratio,p-value
0,0.995,0.545566,4.035058,4.078919e-55
1,0.99,0.423171,4.644112,6.005944e-140
