In [5]:
#"eos1pu1" cardiotoxicity classifier
from ersilia.api import ErsiliaHub, ErsiliaModel

mdl_toxicity = ErsiliaModel("eos1pu1")
mdl_toxicity.fetch()
mdl_toxicity.serve()
input = [
    "CC1=C(C(=O)NC(=O)N1C)C",  # Trimethoprim
    "CC1=C(C(=O)N(C(=O)N1C)C)C",  # Sulfamethoxazole
    "COC1=CC=CC=C1N(C)S(=O)(=O)N",  # Sulfadiazine
    "CC1=CC(=O)C(=C(C1=O)O)O",  # Tetracycline
    "CC(=O)NC1=CC=C(C=C1)S(=O)(=O)N",  # Sulfapyridine
    "C[C@@H]1CN(C(=O)NC1=O)C2=CC=CC=C2",  # Ampicillin
    "CC(=O)O[C@H]1[C@H](O)[C@H](O)[C@@H](CO)O1",  # Amoxicillin
    "COC1=C(C=CC(=C1)C(=O)NC2CCCCC2)O",  # Ciprofloxacin
    "CCN1C2=C(C=CC(=C2)Cl)C(=O)N(C1=O)C",  # Chloramphenicol
    "CN(C)CCOC(=O)C1=CNC2=CC=CC=C12",  # Metronidazole
    "CN1C=NC2=C1N=CN2C",  # Acyclovir (antiviral, included often)
    "CCOC(=O)C1=CN=C(N=C1N)N",  # Trimethoprim alternative form
    "COC1=CC=C(C=C1)C2=NC=NC3=C2N=CN3",  # Azithromycin
    "CNC(=O)[C@@H]1CN(C2=CC=CC=C12)C",  # Clarithromycin
    "CC1=C(C(=O)N(C(=O)N1C)C2=CC=CC=C2)C",  # Erythromycin
    "CC1=CC(=O)NC(=O)N1C2=CC=CC=C2",  # Cephalexin
    "CC1=C(C=CC=C1)NC(=O)C2=CC=CC=C2",  # Clindamycin
    "CC1=C(C=C(C=C1)C(=O)NCC2=CN=CN2)O",  # Levofloxacin
    "CN(C)CCCN1C=NC2=C1N=CN2C",  # Ofloxacin
    "CC(=O)NC1=C(C=CC(=C1)O)O",  # Norfloxacin
    "COC1=C(C=C(C=C1)N)S(=O)(=O)N",  # Sulfisoxazole
    "C1=CC=C2C(=C1)C=CN2",  # Rifampicin (simplified)
    "CC1=C(C=C(C=C1)Cl)O",  # Doxycycline
    "COC1=C(C=C(C=C1)N)N",  # Nitrofurantoin
    "CN1C=NC2=C1N=CN2C",  # Gramicidin S (simplified core)
    "C[C@@H](C(=O)O)N",  # Penicillin G (simplified)
    "CCOC(=O)C1=CN=CN1",  # Tylosin
    "CC1=C(C(=O)NC(=O)N1C)C2=CC=CC=C2",  # Ceftriaxone
    "CC1=C(C=CC(=C1)C(=O)O)O",  # Fosfomycin (simplified)
    "C1=CC=C(C=C1)C2=NC=CN=C2",  # Mupirocin (simplified)
]

df = mdl_toxicity.run(input, batch_size=100)
df.to_csv("toxiciity_results_scale.csv")
mdl_toxicity.close()
mdl_toxicity.delete()

[34m⬇️  Fetching model eos1pu1: cardiotox-dictrank[0m
[34mGetting model source[0m
[31mModel already exists on your system. If you want to fetch it again, please delete it first.[0m
[32m👍 Model eos1pu1 is already fetched successfully![0m
[34mServing model. This process may take some time...[0m
Starting runner[0m
Running batch 1[0m
Batch 1 response fetched within: 0.0736 seconds[0m
Output is being generated within: 0.12830 seconds[0m
[32m[1m✅ The output successfully generated[0m
[32m⛔ Model eos1pu1 closed[0m
Deleting model eos1pu1[0m
[32m💥 Model eos1pu1 deleted successfully![0m


In [15]:
!pip install scikit-learn
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

#Impute missing values with median
imputer = SimpleImputer(strategy="median")
imputer.fit(df) 

numeric_cols = df.select_dtypes(include=['number']).columns

s = RobustScaler()
df_scaled = df.copy() #need to define df_scaled variable first 
df_scaled[numeric_cols] = s.fit_transform(df[numeric_cols])
print(df_scaled)

                                 key  \
0   167461c446df44b4c20d82ebb361e15f   
1   ed127f71e288afbfed1f3f23dff7aa7a   
2   350f93df2a18ec6ce17f166d84c5bdeb   
3   540352c51192dfaa4c7c99d6df51b062   
4   66894e93d51a5782e0631eca4f2111ca   
5   0e7473db58cf1988204fa7e32cbe6b67   
6   09b2c907e2827f7a50ab813d1fd7c7d8   
7   fd7e95457ac60bf4160a1273e640ec5c   
8   d18b10e59cc4a7efc4197fdb11f4453d   
9   c08770cf58daa0fbfb3b89bd98ab8c3b   
10  67f187d0bf7690730663f74395f47e8a   
11  b388f47412d55b3a9efaf6f4f9a0550a   
12  5338ea05453486b34a78e1f6f9f1bd8e   
13  24c42a87c512ec29ea6502c7ec37d305   
14  bd294461ce8b5a0ce393960895ef23df   
15  b497942db2b11b4684f26d9e032f4e34   
16  6249e4d97aff52fecd28fac8ba5ed6ee   
17  04109c7777b1bb6f326ea0e74fba2458   
18  5185922f3c35a81f411e055e9ad38679   
19  7054e7685eb951f55984be7ab1efee78   
20  1f2560cb3300f4b8d0de81c9704e6b8e   
21  89af5b42dd8bc84cbd8306377ab92ac5   
22  12ee20216b869400e56e4154d58b2558   
23  dc66c5877b8073ff3573d66039805c96   


In [17]:
from sklearn.preprocessing import KBinsDiscretizer
import numpy as np


pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),  # Step 1
    ('scaler', StandardScaler())                    # Step 2
])

#filter out non-numeric columns
numeric_df = df_scaled.select_dtypes(include=['number'])

X = numeric_df.to_numpy()
n_bins = 250

# Choose 'uniform' or 'quantile' strategy
kbd = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
X_binned = kbd.fit_transform(X)

print(X_binned.astype(int))

[[ 48]
 [ 91]
 [166]
 [100]
 [216]
 [ 75]
 [ 94]
 [196]
 [162]
 [249]
 [ 58]
 [ 24]
 [147]
 [208]
 [155]
 [ 81]
 [187]
 [160]
 [195]
 [ 38]
 [123]
 [157]
 [ 84]
 [154]
 [ 58]
 [ 43]
 [  0]
 [ 99]
 [ 96]
 [ 96]]


In [13]:
!pip install scikit-learn

from sklearn.preprocessing import RobustScaler
import numpy as np

X = np.array([
    0.3018225721399196,
    0.4241081363975676,
    0.6380064222250595,
    0.4494531304384597,
    0.7803105992763514,
    0.3800993712588283,
    0.4329686367534138,
    0.7242706309569341,
    0.6267021749192344,
    0.8772698730210193,
    0.3305035120282103,
    0.2339158148648085,
    0.5849003536037762,
    0.7587308071282736,
    0.6063720013394521,
    0.3972592192720902,
    0.6982505618660925,
    0.6205655958871416,
    0.7214301678001082,
    0.2738823937136945,
    0.5153336907189141,
    0.6137923814236498,
    0.4050836500008021,
    0.6051195655857674,
    0.3305035120282103,
    0.2891448695474218,
    0.1636532700536332,
    0.4462634243496572,
    0.4381556361535564,
    0.4400922830153206
])

# Reshape to 2D array for sklearn
X = X.reshape(-1, 1)

# Apply RobustScaler
s = RobustScaler()
X_scaled = s.fit_transform(X)

print("Scaled data:\n", X_scaled)

Scaled data:
 [[-0.60651423]
 [-0.09863888]
 [ 0.78972163]
 [ 0.00662373]
 [ 1.38073811]
 [-0.2814157 ]
 [-0.06183953]
 [ 1.14799339]
 [ 0.74277293]
 [ 1.78342852]
 [-0.4873968 ]
 [-0.88854398]
 [ 0.56916196]
 [ 1.2911131 ]
 [ 0.65833783]
 [-0.21014757]
 [ 1.03992707]
 [ 0.71728654]
 [ 1.13619641]
 [-0.72255513]
 [ 0.2802383 ]
 [ 0.68915608]
 [-0.17765121]
 [ 0.65313622]
 [-0.4873968 ]
 [-0.65916715]
 [-1.18035778]
 [-0.00662373]
 [-0.04029693]
 [-0.03225366]]


In [None]:
# Scaling a Pandas DataFrame

import pandas as pd
from sklearn.preprocessing import StandardScaler

df = pd.DataFrame({
    'feature1': [1, 2, 3, 4],
    'feature2': [100, 200, 300, 400]
})

scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)