In [None]:
import pandas as pd
import pickle

# 1. Import herbal compounds dataset

In [None]:
df_pca = pd.read_csv("../data/pca/pca_fingerprint_results.csv")
df_pca.head(5)

In [None]:
herbal_df_pca = df_pca[df_pca['class'] == 2].reset_index(drop=True)
herbal_df_pca.head(5)

In [None]:
herbal_df_pca.info()

# 2. Load SVC Model

In [None]:
optimized_SVC = pickle.load(open("../model/optimized_svc_model.pkl", "rb"))

In [None]:
optimized_SVC.best_estimator_

# 4. Split Data

In [None]:
X = herbal_df_pca.drop(['class'], axis=1)
y = herbal_df_pca['class']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
y_predict = optimized_SVC.predict(X)
y_predict

In [None]:
y_predict_proba = optimized_SVC.predict_proba(X)
y_predict_proba_ligand = y_predict_proba[:,1]
y_predict_proba_ligand

# 5. Concatenate to original data

In [None]:
all_compounds_df = pd.read_csv("../data/results/all_fingerprints.csv")
herbal_df = all_compounds_df[all_compounds_df["class"] == 2][["smiles", "class"]].copy().reset_index(drop=True)
herbal_df.head()

In [None]:
herbal_df["prediction"] = pd.Series(y_predict)
herbal_df

In [None]:
herbal_df["probability"] = pd.Series(y_predict_proba_ligand)
herbal_df

In [None]:
herbal_df.drop(["class"], axis=1, inplace=True)

# 6. Sort data by probability of similarity to ligand compounds

In [None]:
herbal_df.sort_values(by="probability", ascending=False, inplace=True)
herbal_df

In [None]:
predicted_herbal_df = herbal_df[herbal_df["prediction"] == 1]

In [None]:
predicted_herbal_df.head(10)

# 7. Resolve herbal compounds name by dataframe merging

In [None]:
herbal_df_name = pd.read_csv("../data/herbals/herbal_smiles_all.csv")
herbal_df_name

In [None]:
final_predicted_herbal_df = pd.merge(predicted_herbal_df, herbal_df_name, on="smiles", how="inner")
final_predicted_herbal_df

In [None]:
final_predicted_herbal_df.rename(columns={"Senyawa": "compound_name","smiles":"compound_smiles"}, inplace=True)
final_predicted_herbal_df.insert(0, "compound_name", final_predicted_herbal_df.pop("compound_name"))
final_predicted_herbal_df

In [None]:
final_predicted_herbal_df.to_csv("../data/results/herbal_prediction.csv", index=False)

# FINAL RESULT

Now, we got the herbal compounds with the highest probability toward ligand compound. So, these compounds can be considered as candidate compound as the remedy of Alzheimer Diseases. These top 10 compounds are:

In [None]:
final_predicted_herbal_df.head(10)