In [1]:
import pandas as pd
import numpy as np

GrowthCurve_allData.csv is the very first screen we did, with a ~700 cpd antimicrobial library from MedChem Express, and 2000 cpds from the IRIC library. Each was done in 1 replicate, but at 4 concentrations.

Enamine_endOD and Enamine_t6_t12 correspond to the second screen (of the Enamine antimicrobial library, 32,000 cpds). They both have the same information, but _t6_t12 includes the mid point measurement as well. This one was done in 2 replicates, but only at 1 concentration (50 μM).

Now, because we could only measure 1 concentration in the Enamine screen, we selected the most active compounds (254) out of it, and did another experiment where we took several time points and several concentrations, in 2 replicates. So Enamine_DR_growthcurves.csv is another experiment, but with a subset of the Enamine compounds. This experiment is part of the hit-validation pipeline required in drug development programs, it's a Dose-Response assay that helps you select candidate compounds by their potency. The MIC is one way of representing that potency, another commonly used is the IC50.

# df_GrowthCurve_allData

In [2]:
df_GrowthCurve_allData = pd.read_csv('/Users/ethankreuzer/Desktop/UdeM/MILA/GrowthCurve_allData.csv')
df_GrowthCurve_allData['MIC'].unique()

print(df_GrowthCurve_allData['Concentration'].unique())
print(df_GrowthCurve_allData['MIC'].unique())

[ 0.2  1.2 50.   7.9]
['DMSO' 'Inactive' '7.9' '50' '1.2' '0.2' 'Cipro' 'Fosfo']


Cipro and Fosfo are known anti biotics (Postive Control)

DMSO is a negative control

In [3]:
df_GrowthCurve_allData = df_GrowthCurve_allData[['Well','Plate','ProductName','Concentration','MIC','Smiles',
                                                 't_0','t_2.08', 't_4.16', 't_6.24', 't_8.32', 't_10.4', 't_12.48']] #keep these columns

df_GrowthCurve_allData=df_GrowthCurve_allData.rename(columns={"Plate": "Plate_ID","MIC":"Control_Label","ProductName":"Compound"}) #rename to be consistent with other compounds

df_GrowthCurve_allData["Control_Label"] = df_GrowthCurve_allData["Control_Label"].apply(
    lambda x: 1 if x in ['Cipro', 'Fosfo'] else (-1 if x == 'DMSO' else 0)
)

df_GrowthCurve_allData

Unnamed: 0,Well,Plate_ID,Compound,Concentration,Control_Label,Smiles,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48
0,A01,546,DMSO,0.2,-1,,0.0,0.281996,0.570499,0.763557,0.872017,0.965293,1.041215
1,A02,546,DMSO,0.2,-1,,0.0,0.288503,0.613883,0.778742,0.889371,0.978308,1.062907
2,A03,546,Polyoxyethylene stearate,0.2,0,O=C(OCCO)CCCCCCCCCCCCCCCCC.[n].[n].[=].[10],0.0,0.286334,0.587852,0.765727,0.889371,0.982646,1.043384
3,A04,546,Lefamulin (acetate),0.2,0,CC(O)=O.C[C@@H]1C23[C@](C(CC3)=O)([H])C([C@H](...,0.0,0.262473,0.559653,0.737527,0.863341,0.950108,1.008677
4,A05,546,Cefodizime (sodium),0.2,0,O=C(C(N12)=C(CSC3=NC(C)=C(CC(O[Na])=O)S3)CS[C@...,0.0,0.275488,0.924078,0.676790,0.685466,0.668113,0.672451
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18427,P20,S10,DMSO,7.9,-1,,0.0,0.187970,0.511278,0.736842,0.857143,0.924812,0.989975
18428,P21,S10,DMSO,7.9,-1,,0.0,0.185464,0.491228,0.736842,0.867168,0.937343,1.002506
18429,P22,S10,DMSO,7.9,-1,,0.0,0.187970,0.506266,0.741855,0.874687,0.942356,1.012531
18430,P23,S10,DMSO,7.9,-1,,0.0,0.197995,0.523810,0.761905,0.877193,0.932331,0.997494


In [4]:
# Group by Compound and Concentration, then compute number of unique Wells and Plate_IDs
well_and_plate_consistency = (
    df_GrowthCurve_allData
    .groupby(['Compound', 'Concentration'])
    .agg(
        n_unique_wells=('Well', 'nunique'),
        n_unique_plates=('Plate_ID', 'nunique')
    )
)

# Filter to rows where there's more than one unique well
inconsistent_rows = well_and_plate_consistency[well_and_plate_consistency['n_unique_wells'] > 1]

# Display result
if inconsistent_rows.empty:
    print("All (Compound, Concentration) pairs are associated with a single Well and Plate_ID.")
else:
    print("The following (Compound, Concentration) pairs have inconsistencies:")
    print(inconsistent_rows)

The following (Compound, Concentration) pairs have inconsistencies:
                             n_unique_wells  n_unique_plates
Compound      Concentration                                 
Ciprofloxacin 0.2                        12                8
              1.2                        12                8
              7.9                        12                8
              50.0                       12                8
DMSO          0.2                       384               12
              1.2                       384               12
              7.9                       384               12
              50.0                      384               12
Fosfomycin    0.2                        12                7
              1.2                        12                7
              7.9                        12                7
              50.0                       12                7


All the positive controls Fosfomycin get labeled as positive controls but for some reason not all Ciprofloxacin get labeled as positive contorls

# Control Growth Curves

In [4]:
df_control_growth_curves = pd.read_csv('Control_growthcurves.csv')
df_control_growth_curves

Unnamed: 0,Well,Concentration,Compound,Replicate,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48,Smiles
0,C3,50.00,Rifampicin,4,0.003157,0.001579,0.001579,0.001579,0.001579,0.001579,0.003157,CN1CCN(CC1)/N=C/c2c(O)c3c5C(=O)[C@@]4(C)O/C=C/...
1,C4,0.20,Fosfomycin,1,0.000000,0.137337,0.503571,0.688266,0.926633,1.109750,1.212358,C[C@H]1[C@H](O1)P(=O)(O)O
2,C6,7.90,Ciprofloxacin,4,0.000000,0.012629,0.014207,0.012629,0.011050,0.011050,0.009472,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
3,C8,3.13,Ciprofloxacin,5,0.000000,0.029993,0.042622,0.039465,0.036308,0.033150,0.031572,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
4,C9,50.00,Trimethoprim,5,0.000000,0.033150,0.137337,0.157859,0.172066,0.154702,0.170488,Nc1nc(N)ncc1Cc(cc2OC)cc(OC)c2OC
...,...,...,...,...,...,...,...,...,...,...,...,...
195,N16,7.90,Ciprofloxacin,2,0.000000,0.009472,0.011050,0.011050,0.009472,0.009472,0.007893,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
196,N17,7.90,DMSO,5,0.000000,0.127866,0.451477,0.607758,0.771931,0.838232,1.101857,
197,N20,3.13,Rifampicin,6,0.000000,0.066301,0.102608,0.101030,0.086823,0.083665,0.061565,CN1CCN(CC1)/N=C/c2c(O)c3c5C(=O)[C@@]4(C)O/C=C/...
198,N21,50.00,DMSO,2,0.000000,0.126287,0.416748,0.514621,0.606179,0.674058,0.735624,


In [5]:
well_and_plate_consistency = (
    df_control_growth_curves
    .groupby(['Compound', 'Concentration'])
    .agg(
        n_unique_wells=('Well', 'nunique'),
        
    )
)

# Filter to rows where there's more than one unique well
inconsistent_rows = well_and_plate_consistency[well_and_plate_consistency['n_unique_wells'] > 1]

# Display result
if inconsistent_rows.empty:
    print("All (Compound, Concentration) pairs are associated with a single Well and Plate_ID.")
else:
    print("The following (Compound, Concentration) pairs have inconsistencies:")
    print(inconsistent_rows)

The following (Compound, Concentration) pairs have inconsistencies:
                             n_unique_wells
Compound      Concentration                
Ciprofloxacin 0.200                       6
              0.781                       6
              1.200                       6
              3.130                       6
              7.900                       6
              12.500                      6
              50.000                      5
DMSO          0.200                       6
              0.781                       5
              1.200                       5
              3.130                       5
              7.900                       6
              12.500                      6
              50.000                      4
Fosfomycin    0.200                       6
              0.781                       6
              1.200                       3
              3.130                       6
              7.900                       6
        

In [152]:
df_control_growth_curves = (
    df_control_growth_curves
    .groupby(['Compound','Concentration'], as_index=False)
    .agg({
        'Well': 'first',
        't_0': 'mean',
        't_2.08': 'mean',
        't_4.16': 'mean',
        't_6.24': 'mean',
        't_8.32': 'mean',
        't_10.4': 'mean',
        't_12.48': 'mean',
        'Smiles': 'first',
        
    })
)
df_control_growth_curves

Unnamed: 0,Compound,Concentration,Well,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48,Smiles
0,Ciprofloxacin,0.2,J6,0.0,0.083928,0.168646,0.177592,0.158912,0.143915,0.127077,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
1,Ciprofloxacin,0.781,D21,0.0,0.05183,0.097873,0.106555,0.104976,0.100241,0.093137,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
2,Ciprofloxacin,1.2,D6,0.001052,0.042359,0.073668,0.074194,0.07051,0.066564,0.063144,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
3,Ciprofloxacin,3.13,C8,0.0,0.028152,0.038412,0.035255,0.032361,0.030256,0.027888,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
4,Ciprofloxacin,7.9,C6,0.0,0.009998,0.011839,0.011313,0.009998,0.009208,0.007893,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
5,Ciprofloxacin,12.5,E19,0.000526,0.004473,0.004473,0.003683,0.00342,0.003157,0.002105,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
6,Ciprofloxacin,50.0,D4,0.001894,0.001894,0.001894,0.001894,0.000947,0.000947,0.000316,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O
7,DMSO,0.2,F16,0.0,0.123919,0.436217,0.57145,0.701158,0.841126,0.959257,
8,DMSO,0.781,H18,0.0,0.126919,0.441058,0.548087,0.665534,0.788348,0.880854,
9,DMSO,1.2,C19,0.0,0.117763,0.409171,0.550928,0.662693,0.789611,0.916214,


In [153]:
df_control_growth_curves["Control_Label"] = df_control_growth_curves["Compound"].apply(
    lambda x: -1 if x =='DMSO' else 1
)

df_control_growth_curves["Plate_ID"] = np.NaN


In [154]:
df_control_growth_curves["Well"] = df_control_growth_curves["Well"].str[0] + df_control_growth_curves["Well"].str[1:].str.zfill(2)


In [155]:
df_control_growth_curves

Unnamed: 0,Compound,Concentration,Well,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48,Smiles,Control_Label,Plate_ID
0,Ciprofloxacin,0.2,J06,0.0,0.083928,0.168646,0.177592,0.158912,0.143915,0.127077,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1,
1,Ciprofloxacin,0.781,D21,0.0,0.05183,0.097873,0.106555,0.104976,0.100241,0.093137,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1,
2,Ciprofloxacin,1.2,D06,0.001052,0.042359,0.073668,0.074194,0.07051,0.066564,0.063144,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1,
3,Ciprofloxacin,3.13,C08,0.0,0.028152,0.038412,0.035255,0.032361,0.030256,0.027888,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1,
4,Ciprofloxacin,7.9,C06,0.0,0.009998,0.011839,0.011313,0.009998,0.009208,0.007893,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1,
5,Ciprofloxacin,12.5,E19,0.000526,0.004473,0.004473,0.003683,0.00342,0.003157,0.002105,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1,
6,Ciprofloxacin,50.0,D04,0.001894,0.001894,0.001894,0.001894,0.000947,0.000947,0.000316,C1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1,
7,DMSO,0.2,F16,0.0,0.123919,0.436217,0.57145,0.701158,0.841126,0.959257,,-1,
8,DMSO,0.781,H18,0.0,0.126919,0.441058,0.548087,0.665534,0.788348,0.880854,,-1,
9,DMSO,1.2,C19,0.0,0.117763,0.409171,0.550928,0.662693,0.789611,0.916214,,-1,


# df_Enamine_DR_growthCurves

Enamine_DR_growthCurves has a subset of compounds in df_Enamine_t6_t12. Need to remove those rows from df_Enamine_t6_t12

In [156]:
df_Enamine_DR_growthCurves=pd.read_csv('/Users/ethankreuzer/Desktop/UdeM/MILA/Enamine_DR_growthcurves.csv')

#df_Enamine_DR_growthCurves['Concentration'].unique()
df_Enamine_DR_growthCurves['Concentration'].unique()

array([50.   , 12.5  ,  3.13 ,  0.781,  0.2  ])

In [157]:
df_Enamine_DR_growthCurves

Unnamed: 0,Well,Plate,Compound,Concentration,Replicate,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48,MIC,Smiles
0,A03,111-DR1,Z27542156,50.000,D,0.0,0.130220,0.434068,0.625569,0.732809,0.840049,0.947289,Inactive,[O-][N+](=O)C1=CC=C(S1)C(=O)NCC=2C=CC=3OCCOC3C2
1,A04,111-DR1,Z27542156,12.500,D,0.0,0.171074,0.518328,0.651102,0.778769,0.901329,1.018783,Inactive,[O-][N+](=O)C1=CC=C(S1)C(=O)NCC=2C=CC=3OCCOC3C2
2,A05,111-DR1,Z27542156,3.130,D,0.0,0.181287,0.543862,0.668975,0.801749,0.926863,1.039210,Inactive,[O-][N+](=O)C1=CC=C(S1)C(=O)NCC=2C=CC=3OCCOC3C2
3,A06,111-DR1,Z27542156,0.781,D,0.0,0.194054,0.556628,0.679189,0.804302,0.931969,1.039210,Inactive,[O-][N+](=O)C1=CC=C(S1)C(=O)NCC=2C=CC=3OCCOC3C2
4,A07,111-DR1,Z27542156,0.200,D,0.0,0.194054,0.571948,0.686849,0.814516,0.939629,1.049423,Inactive,[O-][N+](=O)C1=CC=C(S1)C(=O)NCC=2C=CC=3OCCOC3C2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2635,P03,111-DR9,Z6204188633,50.000,E,0.0,0.225165,0.501503,0.624320,0.716433,0.790635,0.859720,Inactive,NCC=1C=CC(OC=2C=CC(Cl)=CC2Cl)=CC1
2636,P04,111-DR9,Z6204188633,12.500,E,0.0,0.291691,0.614086,0.734344,0.857161,0.959509,1.056739,Inactive,NCC=1C=CC(OC=2C=CC(Cl)=CC2Cl)=CC1
2637,P05,111-DR9,Z6204188633,3.130,E,0.0,0.281456,0.624320,0.762490,0.890424,1.000448,1.107913,Inactive,NCC=1C=CC(OC=2C=CC(Cl)=CC2Cl)=CC1
2638,P06,111-DR9,Z6204188633,0.781,E,0.0,0.278897,0.647349,0.752255,0.895541,1.013241,1.123265,Inactive,NCC=1C=CC(OC=2C=CC(Cl)=CC2Cl)=CC1


In [112]:
df_Enamine_DR_growthCurves[df_Enamine_DR_growthCurves['Compound'] == "Z1084007582"]

Unnamed: 0,Well,Plate,Compound,Concentration,Replicate,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48,MIC,Smiles
215,K18,111-DR1,Z1084007582,50.0,D,0.0,0.183841,0.492795,0.648549,0.781322,0.898776,1.011123,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
216,K19,111-DR1,Z1084007582,12.5,D,0.0,0.181287,0.505561,0.653655,0.771109,0.888563,0.99325,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
217,K20,111-DR1,Z1084007582,3.13,D,0.0,0.176181,0.518328,0.651102,0.760896,0.865583,0.97027,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
218,K21,111-DR1,Z1084007582,0.781,D,0.0,0.176181,0.515775,0.653655,0.760896,0.870689,0.975376,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
219,K22,111-DR1,Z1084007582,0.2,D,0.0,0.178734,0.543862,0.694509,0.817069,0.929416,1.03155,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
235,L18,111-DR1,Z1084007582,50.0,E,0.0,0.178734,0.533648,0.674082,0.799196,0.914096,1.021336,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
236,L19,111-DR1,Z1084007582,12.5,E,0.0,0.183841,0.518328,0.653655,0.773662,0.888563,0.995803,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
237,L20,111-DR1,Z1084007582,3.13,E,0.0,0.165967,0.510668,0.648549,0.755789,0.865583,0.965163,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
238,L21,111-DR1,Z1084007582,0.781,E,0.0,0.188947,0.525988,0.671529,0.791536,0.903883,1.00857,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2
239,L22,111-DR1,Z1084007582,0.2,E,0.0,0.186394,0.538755,0.691955,0.809409,0.924309,1.028996,Inactive,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2


We can see here that compounds at given concnetrations are at different wells for different Replicates

In [158]:
# Group by Compound and Concentration, then compute number of unique Wells and Plate_IDs
well_and_plate_consistency = (
    df_Enamine_DR_growthCurves
    .groupby(['Compound', 'Concentration'])
    .agg(
        n_unique_wells=('Well', 'nunique'),
        n_unique_plates=('Plate', 'nunique')
    )
)

# Filter to rows where there's more than one unique well
inconsistent_rows = well_and_plate_consistency[well_and_plate_consistency['n_unique_wells'] > 1]

# Display result
if inconsistent_rows.empty:
    print("All (Compound, Concentration) pairs are associated with a single Well and Plate_ID.")
else:
    print("The following (Compound, Concentration) pairs have inconsistencies:")
    print(inconsistent_rows)



The following (Compound, Concentration) pairs have inconsistencies:
                           n_unique_wells  n_unique_plates
Compound    Concentration                                 
Z1084007582 0.200                       2                1
            0.781                       2                1
            3.130                       2                1
            12.500                      2                1
            50.000                      2                1
...                                   ...              ...
Z999961704  0.200                       2                1
            0.781                       2                1
            3.130                       2                1
            12.500                      2                1
            50.000                      2                1

[1320 rows x 2 columns]


In [160]:
inconsistent_rows

Unnamed: 0_level_0,Unnamed: 1_level_0,n_unique_wells,n_unique_plates
Compound,Concentration,Unnamed: 2_level_1,Unnamed: 3_level_1
Z1084007582,0.200,2,1
Z1084007582,0.781,2,1
Z1084007582,3.130,2,1
Z1084007582,12.500,2,1
Z1084007582,50.000,2,1
...,...,...,...
Z999961704,0.200,2,1
Z999961704,0.781,2,1
Z999961704,3.130,2,1
Z999961704,12.500,2,1


In [161]:
df_Enamine_DR_growthCurves = (
    df_Enamine_DR_growthCurves
    .groupby(['Compound','Concentration'], as_index=False)
    .agg({
        'Well': 'first', #this is a shortcut becuase the replicates are in different wells
        't_0': 'mean',
        't_2.08': 'mean',
        't_4.16': 'mean',
        't_6.24': 'mean',
        't_8.32': 'mean',
        't_10.4': 'mean',
        't_12.48': 'mean',
        'Smiles': 'first',
        'Plate': 'first', #This is valid since all compound concentrations pairs are on the same plate
        'MIC': 'first' #this will get chnaged to control label
    })
)

In [162]:
df_Enamine_DR_growthCurves=df_Enamine_DR_growthCurves.rename(columns={"Plate": "Plate_ID","MIC":"Control_Label"}) #rename to be consistent with other compounds

df_Enamine_DR_growthCurves["Control_Label"] = 0 

df_Enamine_DR_compounds = df_Enamine_DR_growthCurves['Compound'].unique()

df_Enamine_DR_growthCurves

Unnamed: 0,Compound,Concentration,Well,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48,Smiles,Plate_ID,Control_Label
0,Z1084007582,0.200,K22,0.0,0.182564,0.541308,0.693232,0.813239,0.926863,1.030273,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
1,Z1084007582,0.781,K21,0.0,0.182564,0.520882,0.662592,0.776216,0.887286,0.991973,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
2,Z1084007582,3.130,K20,0.0,0.171074,0.514498,0.649825,0.758342,0.865583,0.967716,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
3,Z1084007582,12.500,K19,0.0,0.182564,0.511945,0.653655,0.772386,0.888563,0.994526,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
4,Z1084007582,50.000,K18,0.0,0.181287,0.513222,0.661315,0.790259,0.906436,1.016230,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1315,Z999961704,0.200,A17,0.0,0.188947,0.536202,0.671529,0.799196,0.917926,1.026443,CC1=CC=C(C)N1C2CCN(CC2)C(=O)C3=C(C)OC(C)=C3S(=...,111-DR1,0
1316,Z999961704,0.781,A16,0.0,0.191501,0.529818,0.657485,0.783876,0.900053,1.007293,CC1=CC=C(C)N1C2CCN(CC2)C(=O)C3=C(C)OC(C)=C3S(=...,111-DR1,0
1317,Z999961704,3.130,A15,0.0,0.191501,0.536202,0.672805,0.797919,0.914096,1.018783,CC1=CC=C(C)N1C2CCN(CC2)C(=O)C3=C(C)OC(C)=C3S(=...,111-DR1,0
1318,Z999961704,12.500,A14,0.0,0.191501,0.541308,0.666422,0.794089,0.914096,1.025166,CC1=CC=C(C)N1C2CCN(CC2)C(=O)C3=C(C)OC(C)=C3S(=...,111-DR1,0


In [163]:
df_Enamine_DR_growthCurves['Well'].unique()

array(['K22', 'K21', 'K20', 'K19', 'K18', 'I12', 'I11', 'I10', 'I09',
       'I08', 'A12', 'A11', 'A10', 'A09', 'A08', 'I22', 'I21', 'I20',
       'I19', 'I18', 'C22', 'C21', 'C20', 'C19', 'C18', 'O22', 'O21',
       'O20', 'O19', 'O18', 'M07', 'M06', 'M05', 'M04', 'M03', 'I07',
       'I06', 'I05', 'I04', 'I03', 'E22', 'E21', 'E20', 'E19', 'E18',
       'A07', 'A06', 'A05', 'A04', 'A03', 'C07', 'C06', 'C05', 'C04',
       'C03', 'C12', 'C11', 'C10', 'C09', 'C08', 'C17', 'C16', 'C15',
       'C14', 'C13', 'O12', 'O11', 'O10', 'O09', 'O08', 'E12', 'E11',
       'E10', 'E09', 'E08', 'G22', 'G21', 'G20', 'G19', 'G18', 'M22',
       'M21', 'M20', 'M19', 'M18', 'O17', 'O16', 'O15', 'O14', 'O13',
       'A17', 'A16', 'A15', 'A14', 'A13', 'M12', 'M11', 'M10', 'M09',
       'M08', 'G07', 'G06', 'G05', 'G04', 'G03', 'A22', 'A21', 'A20',
       'A19', 'A18', 'O07', 'O06', 'O05', 'O04', 'O03', 'G17', 'G16',
       'G15', 'G14', 'G13', 'G12', 'G11', 'G10', 'G09', 'G08', 'K12',
       'K11', 'K10',

Note down all the compounds in this dataframe and delete them in df_Enamine_t6_t12

# df_Enamine_t6_t12

In [190]:
df_Enamine_t6_t12_controls=pd.read_csv('/Users/ethankreuzer/Desktop/UdeM/MILA/Enamine_t6_t12_ctrls.csv')
df_Enamine_t6_t12_wells=pd.read_csv('/Users/ethankreuzer/Desktop/UdeM/MILA/Enamine_t6_t12_wells.csv')

In [None]:
well_and_plate_consistency = (
    df_Enamine_t6_t12_wells
    .groupby(['Compound'])
    .agg(
        n_unique_wells=('Well', 'nunique'),
        n_unique_plates=('Plate_ID', 'nunique')
    )
)

# Filter to rows where there's more than one unique well
inconsistent_rows = well_and_plate_consistency[well_and_plate_consistency['n_unique_wells'] > 1]

# Display result
if inconsistent_rows.empty:
    print("All (Compound, Concentration) pairs are associated with a single Well and Plate_ID.")
else:
    print("The following (Compound, Concentration) pairs have inconsistencies:")
    print(inconsistent_rows)

All compounds are tested on the same plate, but wells are inconsistent across replicates

In [None]:
well_and_plate_consistency = (
    df_Enamine_t6_t12_controls
    .groupby(['Compound'])
    .agg(
        n_unique_wells=('Well', 'nunique'),
        n_unique_plates=('Plate_ID', 'nunique')
    )
)

# Filter to rows where there's more than one unique well
inconsistent_rows = well_and_plate_consistency[well_and_plate_consistency['n_unique_wells'] > 1]

# Display result
if inconsistent_rows.empty:
    print("All (Compound, Concentration) pairs are associated with a single Well and Plate_ID.")
else:
    print("The following (Compound, Concentration) pairs have inconsistencies:")
    print(inconsistent_rows)

In [193]:
df_Enamine_t6_t12_wells

Unnamed: 0,Compound,Replicate,Well,t_0,t_6,t_12,Smiles,Plate_ID,Activity
0,Z839134902,OD_A,A03,0.0,0.564947,0.962840,FC(F)(F)COC(=O)NC=1C=CC(=NC1)N2CCNC(=O)C2,1886318-Y12-A001,Inactive
1,Z965658782,OD_A,B03,0.0,0.692516,1.002325,CC1=CC=C(S1)C=2C=CC(=O)N(CC=3C=CC=C([N+](=O)[O...,1886318-Y12-A001,Inactive
2,Z16698707,OD_A,C03,0.0,0.534574,0.950691,CCN1C(SCC(=O)OCC=2C=CC(=CC2)[N+](=O)[O-])=NN=C...,1886318-Y12-A001,Inactive
3,Z952434162,OD_A,D03,0.0,0.586209,0.996251,[O-][N+](=O)C=1C=CC(=CC1)C(=O)NC=2C=CC=C(C2)C=...,1886318-Y12-A001,Inactive
4,Z558591178,OD_A,E03,0.0,0.574059,0.971952,CS(=O)(=O)NC1CCCN(C1)C(=O)NC2CCN3CCCCC23,1886318-Y12-A001,Inactive
...,...,...,...,...,...,...,...,...,...
63995,Z3342561431,OD_C,M05,0.0,0.523054,0.920575,CC1(C)CN(CC1CN)C=2N=C(N=C3CCCC32)C=4C=CN=CC4,1886318-Y12-A100,Inactive
63996,Z1459426305,OD_C,H15,0.0,0.538745,1.019955,CC(C)CCC1CCCCN1C(=O)C2=CSC(CN)=N2,1886318-Y12-A100,Inactive
63997,Z6191966153,OD_C,A05,0.0,0.564898,1.035647,CN1CCOC=2C=CC(=CC12)C(=O)NCC=3C=CC=NC3CN,1886318-Y12-A100,Inactive
63998,Z2396635673,OD_C,N17,0.0,0.533515,0.993802,CC1CC(CN1C(=O)NC=2C=C(C)C=C(CN)C2)C=3C=CC(C)=CC3,1886318-Y12-A100,Inactive


In [194]:
df_Enamine_t6_t12_controls

Unnamed: 0.1,Unnamed: 0,Well,Replicate,Compound,t_0,t_6,t_12,Smiles,Plate_ID,Concentration
0,1,A01,OD_A,DMSO,0.000000,0.584807,1.150336,,1886318-Y12-A081,50
1,2,A02,OD_A,Ciprofloxacin,0.003213,0.003213,0.000000,1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1886318-Y12-A081,50
2,3,A23,OD_A,DMSO,0.000000,0.552675,1.098924,,1886318-Y12-A081,50
3,4,A24,OD_A,DMSO,0.000000,0.552675,1.111777,,1886318-Y12-A081,50
4,5,B01,OD_A,DMSO,0.000000,0.607300,1.137483,,1886318-Y12-A081,50
...,...,...,...,...,...,...,...,...,...,...
12795,12796,O24,OD_C,DMSO,0.000000,0.604127,1.090567,,1886318-Y12-A100,50
12796,12797,P01,OD_C,DMSO,0.000000,0.708738,1.205639,,1886318-Y12-A100,50
12797,12798,P02,OD_C,DMSO,0.000000,0.632895,1.085337,,1886318-Y12-A100,50
12798,12799,P23,OD_C,Ciprofloxacin,0.000000,0.005231,0.002615,1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,1886318-Y12-A100,50


In [195]:
df_Enamine_t6_t12_wells = (
    df_Enamine_t6_t12_wells
    .groupby('Compound', as_index=False)
    .agg({
        'Well': 'first',
        'Plate_ID': 'first',
        't_0': 'mean',
        't_6': 'mean',
        't_12': 'mean',
        'Smiles': 'first',
        'Activity': 'first'
        
    })
)
df_Enamine_t6_t12_wells['Concentration'] = 50.0

df_Enamine_t6_t12_wells = df_Enamine_t6_t12_wells.rename(columns={"Activity":"Control_Label","t_6":"t_6.24", "t_12":"t_12.48"})

df_Enamine_t6_t12_wells['Control_Label'] = 0 

In [196]:
df_Enamine_t6_t12_wells = df_Enamine_t6_t12_wells[~df_Enamine_t6_t12_wells["Compound"].isin(df_Enamine_DR_compounds)] #remove the compounds that are in DR, as 
#those are the only compounds we want

In [199]:
df_Enamine_t6_t12_wells

Unnamed: 0,Compound,Well,Plate_ID,t_0,t_6.24,t_12.48,Smiles,Control_Label,Concentration
0,Z1000431776,L03,1886318-Y12-A082,0.0,0.464411,0.975276,CC(C)COC(=O)N1CCCN(CC1)C(=O)C=2SC=CC2C3CC3,0,50.0
1,Z1000729530,K14,1886318-Y12-A032,0.0,0.770719,0.923138,CC=1C=CC(OCCCNC(=O)C2=CC=C(O2)S(=O)(=O)N)=CC1,0,50.0
2,Z1001263652,F12,1886318-Y12-A014,0.0,0.861406,1.074267,CC1=NOC=2N=CC(NC(=O)C=3C=CC=C(NS(=O)(=O)C)C3)=...,0,50.0
3,Z1001363792,L12,1886318-Y12-A059,0.0,0.645117,0.984157,CCOC(=O)NCCC(=O)NCC(N1CCCC1)C2=CC=C(C)O2,0,50.0
4,Z1001366686,D07,1886318-Y12-A051,0.0,0.603515,1.027211,CC1=CC=C(O1)C(CNC(=O)C=2C=CC=CC2[N+](=O)[O-])N...,0,50.0
...,...,...,...,...,...,...,...,...,...
31994,Z999074808,A11,1886318-Y12-A034,0.0,0.649522,1.012335,CCOC(=O)NCCC(=O)N(CC1=CC=CO1)C2CCCC2,0,50.0
31995,Z999706442,G07,1886318-Y12-A085,0.0,0.485150,0.985027,CCOC(=O)NCCC(=O)NC(C)(C)C=1C=CC=2OCCOC2C1,0,50.0
31996,Z999855470,M21,1886318-Y12-A062,0.0,0.651710,1.058849,NC(=O)CC1CCCCN1C(=O)C=2C=CC(=CC2F)[N+](=O)[O-],0,50.0
31997,Z999936250,M18,1886318-Y12-A032,0.0,0.694482,0.987004,CC(CC=1C=CC(=CC1)N(C)C)NC(=O)CCNS(=O)(=O)C,0,50.0


In [200]:
df_Enamine_t6_t12_controls = (
    df_Enamine_t6_t12_controls
    .groupby(['Compound', 'Plate_ID'], as_index=False)
    .agg({
        #'Well': 'first', put NA for controls 
        #'Plate_ID': 'first', put NA for controls
        't_0': 'mean',
        't_6': 'mean',
        't_12': 'mean',
        'Smiles': 'first',
        'Concentration' : 'first'
    })
)

df_Enamine_t6_t12_controls = df_Enamine_t6_t12_controls.rename(columns={"t_6":"t_6.24", "t_12":"t_12.48"})


df_Enamine_t6_t12_controls ["Control_Label"] = df_Enamine_t6_t12_controls ["Compound"].apply(
    lambda x: 1 if x in ['Ciprofloxacin'] else (-1 if x == 'DMSO' else 0)
)
###No Activity Label here



In [201]:
df_Enamine_t6_t12_controls

Unnamed: 0,Compound,Plate_ID,t_0,t_6.24,t_12.48,Smiles,Concentration,Control_Label
0,Ciprofloxacin,1886318-Y12-A001,0.010702,0.012627,0.001626,1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,50,1
1,Ciprofloxacin,1886318-Y12-A002,0.006508,0.004574,0.000689,1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,50,1
2,Ciprofloxacin,1886318-Y12-A003,0.006387,0.005974,0.001443,1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,50,1
3,Ciprofloxacin,1886318-Y12-A004,0.008978,0.004569,0.001390,1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,50,1
4,Ciprofloxacin,1886318-Y12-A005,0.009662,0.004427,0.000395,1CNCCN1c(c2)c(F)cc3c2N(C4CC4)C=C(C3=O)C(=O)O,50,1
...,...,...,...,...,...,...,...,...
195,DMSO,1886318-Y12-A096,0.000000,0.590851,1.096532,,50,-1
196,DMSO,1886318-Y12-A097,0.000000,0.597467,1.089052,,50,-1
197,DMSO,1886318-Y12-A098,0.000000,0.606190,1.097033,,50,-1
198,DMSO,1886318-Y12-A099,0.000000,0.614978,1.098136,,50,-1


# Combine into one df

In [None]:
#og = pd.read_pickle("/Users/ethankreuzer/Desktop/UdeM/MILA/GrowthCurve.pkl")
#og['Concentration'].unique()

array([50.  , 12.5 ,  3.13,  7.9 ,  0.2 ,  1.2 ])

In [None]:
df_control_growth_curves

In [None]:
df_Enamine_t6_t12_controls

In [202]:
comb_df = pd.concat([df_Enamine_DR_growthCurves, df_GrowthCurve_allData], ignore_index=True)

comb_df=pd.concat([comb_df, df_Enamine_t6_t12_wells], ignore_index=True)

comb_df=pd.concat([comb_df, df_control_growth_curves], ignore_index=True)

comb_df=pd.concat([comb_df, df_Enamine_t6_t12_controls], ignore_index=True)

#add df_Enamine_t6_t12_controls and add df_control_growth_curves

In [205]:
comb_df.columns

Index(['Compound', 'Concentration', 'Well', 't_0', 't_2.08', 't_4.16',
       't_6.24', 't_8.32', 't_10.4', 't_12.48', 'Smiles', 'Plate_ID',
       'Control_Label'],
      dtype='object')

In [206]:
comb_df.to_pickle("/Users/ethankreuzer/Desktop/UdeM/MILA/GrowthCurve.pkl")

In [207]:
pd.read_pickle("/Users/ethankreuzer/Desktop/UdeM/MILA/GrowthCurve.pkl")

Unnamed: 0,Compound,Concentration,Well,t_0,t_2.08,t_4.16,t_6.24,t_8.32,t_10.4,t_12.48,Smiles,Plate_ID,Control_Label
0,Z1084007582,0.200,K22,0.0,0.182564,0.541308,0.693232,0.813239,0.926863,1.030273,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
1,Z1084007582,0.781,K21,0.0,0.182564,0.520882,0.662592,0.776216,0.887286,0.991973,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
2,Z1084007582,3.130,K20,0.0,0.171074,0.514498,0.649825,0.758342,0.865583,0.967716,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
3,Z1084007582,12.500,K19,0.0,0.182564,0.511945,0.653655,0.772386,0.888563,0.994526,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
4,Z1084007582,50.000,K18,0.0,0.181287,0.513222,0.661315,0.790259,0.906436,1.016230,[O-][N+](=O)C=1C=CC=NC1SC2=NN=C(CC=3C=CC=CC3F)O2,111-DR1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51718,DMSO,50.000,,0.0,,,0.590851,,,1.096532,,1886318-Y12-A096,-1
51719,DMSO,50.000,,0.0,,,0.597467,,,1.089052,,1886318-Y12-A097,-1
51720,DMSO,50.000,,0.0,,,0.606190,,,1.097033,,1886318-Y12-A098,-1
51721,DMSO,50.000,,0.0,,,0.614978,,,1.098136,,1886318-Y12-A099,-1
