In [15]:
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import json
with open('/MSCSol-main/dataset/benchmark.json', 'r') as f:
    data = json.load(f) 
df = pd.read_json('/MSCSol-main/dataset/benchmark.json')
X = df['features'].tolist()
y = [0 if drug['LogS'] >= -1 else 1 if drug['LogS'] >= -3 else 2 for drug in data]

dt_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_split=10, random_state=42)

# 初始化 KFold 交叉验证
kf = KFold(n_splits=5, shuffle=True)

# 初始化性能指标列表
weighted_accuracy_scores = []
weighted_precision_scores = []
weighted_recall_scores = []
weighted_l1_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_val = np.array(X)[train_index], np.array(X)[test_index]
    y_train, y_val = np.array(y)[train_index], np.array(y)[test_index]
    
    dt_classifier.fit(X_train, y_train)
    
    y_pred = dt_classifier.predict(X_val)
    t = classification_report(y_val,y_pred, target_names=['0', '1', '2'],digits=4)
    print(t)
    
    accuracy = accuracy_score(y_val, y_pred)
    weighted_accuracy_scores.append(accuracy)

    precision, recall, fscore, _ = precision_recall_fscore_support(y_val, y_pred, average='weighted')
    weighted_precision_scores.append(precision)
    weighted_recall_scores.append(recall)
    weighted_l1_scores.append(2 * (precision * recall) / (precision + recall)) 

mean_weighted_accuracy = sum(weighted_accuracy_scores) / len(weighted_accuracy_scores)
mean_weighted_precision = sum(weighted_precision_scores) / len(weighted_precision_scores)
mean_weighted_recall = sum(weighted_recall_scores) / len(weighted_recall_scores)
mean_weighted_l1 = sum(weighted_l1_scores) / len(weighted_l1_scores)

print("Mean weighted accuracy:", mean_weighted_accuracy)
print("Mean weighted precision:", mean_weighted_precision)
print("Mean weighted recall:", mean_weighted_recall)
print("Mean weighted L1 score:", mean_weighted_l1)


              precision    recall  f1-score   support

           0     0.7060    0.7126    0.7093       428
           1     0.6571    0.6735    0.6652       683
           2     0.8256    0.8044    0.8149       818

    accuracy                         0.7377      1929
   macro avg     0.7296    0.7302    0.7298      1929
weighted avg     0.7394    0.7377    0.7385      1929

              precision    recall  f1-score   support

           0     0.6522    0.6944    0.6726       432
           1     0.6087    0.5971    0.6028       680
           2     0.7815    0.7672    0.7743       816

    accuracy                         0.6909      1928
   macro avg     0.6808    0.6862    0.6832      1928
weighted avg     0.6916    0.6909    0.6910      1928

              precision    recall  f1-score   support

           0     0.7461    0.5877    0.6575       405
           1     0.6342    0.7066    0.6685       692
           2     0.7947    0.8014    0.7981       831

    accuracy        

In [16]:
from sklearn.naive_bayes import BernoulliNB
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import json
with open('/MSCSol-main/dataset/benchmark.json', 'r') as f:
    data = json.load(f) 
df = pd.read_json('/MSCSol-main/dataset/benchmark.json')
X = df['features'].tolist()
X_binary = (X > np.mean(X, axis=0)).astype(int)
y = [0 if drug['LogS'] >= -1 else 1 if drug['LogS'] >= -3 else 2 for drug in data]

bnb_classifier = BernoulliNB(alpha=0)

# 初始化 KFold 交叉验证
kf = KFold(n_splits=5, shuffle=True)

# 初始化性能指标列表
weighted_accuracy_scores = []
weighted_precision_scores = []
weighted_recall_scores = []
weighted_l1_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_val = np.array(X_binary)[train_index], np.array(X_binary)[test_index]
    y_train, y_val = np.array(y)[train_index], np.array(y)[test_index]
    
    bnb_classifier.fit(X_train, y_train)
    
    y_pred = bnb_classifier.predict(X_val)
    t = classification_report(y_val,y_pred, target_names=['0', '1', '2'],digits=4)
    print(t)
    
    accuracy = accuracy_score(y_val, y_pred)
    weighted_accuracy_scores.append(accuracy)

    precision, recall, fscore, _ = precision_recall_fscore_support(y_val, y_pred, average='weighted')
    weighted_precision_scores.append(precision)
    weighted_recall_scores.append(recall)
    weighted_l1_scores.append(2 * (precision * recall) / (precision + recall)) 

mean_weighted_accuracy = sum(weighted_accuracy_scores) / len(weighted_accuracy_scores)
mean_weighted_precision = sum(weighted_precision_scores) / len(weighted_precision_scores)
mean_weighted_recall = sum(weighted_recall_scores) / len(weighted_recall_scores)
mean_weighted_l1 = sum(weighted_l1_scores) / len(weighted_l1_scores)

print("Mean weighted accuracy:", mean_weighted_accuracy)
print("Mean weighted precision:", mean_weighted_precision)
print("Mean weighted recall:", mean_weighted_recall)
print("Mean weighted L1 score:", mean_weighted_l1)


              precision    recall  f1-score   support

           0     0.6281    0.6957    0.6602       437
           1     0.6031    0.5835    0.5931       677
           2     0.7532    0.7301    0.7414       815

    accuracy                         0.6708      1929
   macro avg     0.6614    0.6697    0.6649      1929
weighted avg     0.6721    0.6708    0.6710      1929

              precision    recall  f1-score   support

           0     0.6501    0.6627    0.6563       415
           1     0.6060    0.5790    0.5922       696
           2     0.7298    0.7503    0.7399       817

    accuracy                         0.6696      1928
   macro avg     0.6620    0.6640    0.6628      1928
weighted avg     0.6679    0.6696    0.6686      1928

              precision    recall  f1-score   support

           0     0.6399    0.6927    0.6652       449
           1     0.6281    0.5665    0.5957       662
           2     0.7645    0.7907    0.7774       817

    accuracy        

In [17]:
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import json
with open('/MSCSol-main/dataset/benchmark.json', 'r') as f:
    data = json.load(f) 
X = df['features'].tolist()
y = [0 if drug['LogS'] >= -1 else 1 if drug['LogS'] >= -3 else 2 for drug in data]

adaboost = AdaBoostClassifier(n_estimators=100, random_state=42)

# 初始化 KFold 交叉验证
kf = KFold(n_splits=5, shuffle=True)

# 初始化性能指标列表
weighted_accuracy_scores = []
weighted_precision_scores = []
weighted_recall_scores = []
weighted_l1_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_val = np.array(X)[train_index], np.array(X)[test_index]
    y_train, y_val = np.array(y)[train_index], np.array(y)[test_index]
    
    adaboost.fit(X_train, y_train)
    
    y_pred = adaboost.predict(X_val)

    t = classification_report(y_val,y_pred, target_names=['0', '1', '2'],digits=4)
    print(t)
    
    accuracy = accuracy_score(y_val, y_pred)
    weighted_accuracy_scores.append(accuracy)

    precision, recall, fscore, _ = precision_recall_fscore_support(y_val, y_pred, average='weighted')
    weighted_precision_scores.append(precision)
    weighted_recall_scores.append(recall)
    weighted_l1_scores.append(2 * (precision * recall) / (precision + recall)) 

mean_weighted_accuracy = sum(weighted_accuracy_scores) / len(weighted_accuracy_scores)
mean_weighted_precision = sum(weighted_precision_scores) / len(weighted_precision_scores)
mean_weighted_recall = sum(weighted_recall_scores) / len(weighted_recall_scores)
mean_weighted_l1 = sum(weighted_l1_scores) / len(weighted_l1_scores)

print("Mean weighted accuracy:", mean_weighted_accuracy)
print("Mean weighted precision:", mean_weighted_precision)
print("Mean weighted recall:", mean_weighted_recall)
print("Mean weighted L1 score:", mean_weighted_l1)


              precision    recall  f1-score   support

           0     0.7500    0.6510    0.6970       447
           1     0.6316    0.6914    0.6602       687
           2     0.8099    0.8038    0.8068       795

    accuracy                         0.7284      1929
   macro avg     0.7305    0.7154    0.7213      1929
weighted avg     0.7325    0.7284    0.7291      1929

              precision    recall  f1-score   support

           0     0.7005    0.6316    0.6643       437
           1     0.6280    0.6686    0.6477       697
           2     0.8018    0.7997    0.8008       794

    accuracy                         0.7142      1928
   macro avg     0.7101    0.7000    0.7042      1928
weighted avg     0.7160    0.7142    0.7145      1928

              precision    recall  f1-score   support

           0     0.7521    0.6496    0.6971       411
           1     0.6470    0.7006    0.6727       688
           2     0.8092    0.8082    0.8087       829

    accuracy        

In [18]:
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import json
with open('/MSCSol-main/dataset/benchmark.json', 'r') as f:
    data = json.load(f) 
df = pd.read_json('/MSCSol-main/dataset/benchmark.json')
X = df['features'].tolist()
y = [0 if drug['LogS'] >= -1 else 1 if drug['LogS'] >= -3 else 2 for drug in data]

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# 初始化 KFold 交叉验证
kf = KFold(n_splits=5, shuffle=True)

# 初始化性能指标列表
weighted_accuracy_scores = []
weighted_precision_scores = []
weighted_recall_scores = []
weighted_l1_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_val = np.array(X)[train_index], np.array(X)[test_index]
    y_train, y_val = np.array(y)[train_index], np.array(y)[test_index]
    
    rf_classifier.fit(X_train, y_train)
    
    y_pred = rf_classifier.predict(X_val)

    t = classification_report(y_val,y_pred, target_names=['0', '1', '2'],digits=4)
    print(t)
    
    accuracy = accuracy_score(y_val, y_pred)
    weighted_accuracy_scores.append(accuracy)

    precision, recall, fscore, _ = precision_recall_fscore_support(y_val, y_pred, average='weighted')
    weighted_precision_scores.append(precision)
    weighted_recall_scores.append(recall)
    weighted_l1_scores.append(2 * (precision * recall) / (precision + recall)) 

mean_weighted_accuracy = sum(weighted_accuracy_scores) / len(weighted_accuracy_scores)
mean_weighted_precision = sum(weighted_precision_scores) / len(weighted_precision_scores)
mean_weighted_recall = sum(weighted_recall_scores) / len(weighted_recall_scores)
mean_weighted_l1 = sum(weighted_l1_scores) / len(weighted_l1_scores)

print("Mean weighted accuracy:", mean_weighted_accuracy)
print("Mean weighted precision:", mean_weighted_precision)
print("Mean weighted recall:", mean_weighted_recall)
print("Mean weighted L1 score:", mean_weighted_l1)


              precision    recall  f1-score   support

           0     0.8118    0.6494    0.7216       425
           1     0.6763    0.7266    0.7006       673
           2     0.8106    0.8448    0.8273       831

    accuracy                         0.7605      1929
   macro avg     0.7662    0.7403    0.7498      1929
weighted avg     0.7640    0.7605    0.7598      1929

              precision    recall  f1-score   support

           0     0.8058    0.6526    0.7211       426
           1     0.7039    0.7049    0.7044       698
           2     0.7930    0.8719    0.8306       804

    accuracy                         0.7630      1928
   macro avg     0.7675    0.7431    0.7520      1928
weighted avg     0.7636    0.7630    0.7607      1928

              precision    recall  f1-score   support

           0     0.8435    0.6510    0.7348       447
           1     0.6877    0.7382    0.7121       680
           2     0.8113    0.8639    0.8368       801

    accuracy        

In [19]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="threadpoolctl")
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
import json
with open('/MSCSol-main/dataset/benchmark.json', 'r') as f:
    data = json.load(f) 
df = pd.read_json('/MSCSol-main/dataset/benchmark.json')
X = df['features'].tolist()
y = [0 if drug['LogS'] >= -1 else 1 if drug['LogS'] >= -3 else 2 for drug in data]

knn_classifier = KNeighborsClassifier(n_neighbors=5)

# 初始化 KFold 交叉验证
kf = KFold(n_splits=5, shuffle=True)

# 初始化性能指标列表
weighted_accuracy_scores = []
weighted_precision_scores = []
weighted_recall_scores = []
weighted_l1_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_val = np.array(X)[train_index], np.array(X)[test_index]
    y_train, y_val = np.array(y)[train_index], np.array(y)[test_index]
    
    knn_classifier.fit(X_train, y_train)
    
    y_pred = knn_classifier.predict(X_val)

    t = classification_report(y_val,y_pred, target_names=['0', '1', '2'],digits=4)
    print(t)
    
    accuracy = accuracy_score(y_val, y_pred)
    weighted_accuracy_scores.append(accuracy)

    precision, recall, fscore, _ = precision_recall_fscore_support(y_val, y_pred, average='weighted')
    weighted_precision_scores.append(precision)
    weighted_recall_scores.append(recall)
    weighted_l1_scores.append(2 * (precision * recall) / (precision + recall)) 

mean_weighted_accuracy = sum(weighted_accuracy_scores) / len(weighted_accuracy_scores)
mean_weighted_precision = sum(weighted_precision_scores) / len(weighted_precision_scores)
mean_weighted_recall = sum(weighted_recall_scores) / len(weighted_recall_scores)
mean_weighted_l1 = sum(weighted_l1_scores) / len(weighted_l1_scores)

print("Mean weighted accuracy:", mean_weighted_accuracy)
print("Mean weighted precision:", mean_weighted_precision)
print("Mean weighted recall:", mean_weighted_recall)
print("Mean weighted L1 score:", mean_weighted_l1)


Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f72649faee0>
Traceback (most recent call last):
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    buf = (HMODULE * buf_count)()
AttributeError: 'NoneType' object has no attribute 'split'


              precision    recall  f1-score   support

           0     0.6681    0.7356    0.7002       435
           1     0.6187    0.6696    0.6432       681
           2     0.8345    0.7319    0.7798       813

    accuracy                         0.7107      1929
   macro avg     0.7071    0.7124    0.7077      1929
weighted avg     0.7208    0.7107    0.7136      1929



Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f72649fae50>
Traceback (most recent call last):
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    buf = (HMODULE * buf_count)()
AttributeError: 'NoneType' object has no attribute 'split'


              precision    recall  f1-score   support

           0     0.6508    0.7354    0.6905       446
           1     0.6495    0.6514    0.6504       697
           2     0.8248    0.7618    0.7921       785

    accuracy                         0.7158      1928
   macro avg     0.7084    0.7162    0.7110      1928
weighted avg     0.7212    0.7158    0.7174      1928



Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f72649faf70>
Traceback (most recent call last):
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    buf = (HMODULE * buf_count)()
AttributeError: 'NoneType' object has no attribute 'split'


              precision    recall  f1-score   support

           0     0.6474    0.7336    0.6878       428
           1     0.6344    0.6362    0.6353       701
           2     0.8122    0.7522    0.7810       799

    accuracy                         0.7059      1928
   macro avg     0.6980    0.7074    0.7014      1928
weighted avg     0.7110    0.7059    0.7074      1928



Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f72649faf70>
Traceback (most recent call last):
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    buf = (HMODULE * buf_count)()
AttributeError: 'NoneType' object has no attribute 'split'


              precision    recall  f1-score   support

           0     0.6174    0.6893    0.6514       412
           1     0.6165    0.6373    0.6267       714
           2     0.8096    0.7369    0.7715       802

    accuracy                         0.6898      1928
   macro avg     0.6812    0.6878    0.6832      1928
weighted avg     0.6970    0.6898    0.6922      1928



Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f72649faf70>
Traceback (most recent call last):
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    
  File "/ifs/home/fanziyu/miniconda3/envs/pgmg2/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    buf = (HMODULE * buf_count)()
AttributeError: 'NoneType' object has no attribute 'split'


              precision    recall  f1-score   support

           0     0.6505    0.6998    0.6743       423
           1     0.6008    0.6820    0.6388       651
           2     0.8474    0.7283    0.7834       854

    accuracy                         0.7064      1928
   macro avg     0.6996    0.7034    0.6988      1928
weighted avg     0.7210    0.7064    0.7106      1928

Mean weighted accuracy: 0.7057354013538717
Mean weighted precision: 0.7141842060808361
Mean weighted recall: 0.7057354013538717
Mean weighted L1 score: 0.7099303852309987
