## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import re
import tensorflow as tf
from autogluon.tabular import TabularPredictor

# To explicitly run on CPU:
#tf.config.set_visible_devices([], 'GPU')

from superpac. base import get_split_mmp_indices
from superpac. eval import get_ag_metrics

## Load data and classifier

In [3]:
full_df = pd.read_csv('./MACCS_keys/MACCS_frag_df.csv')

ds_size = len(full_df)
zero_out, one_out, two_out = get_split_mmp_indices("./index sets for train test split", ds_size)
fulltest_ixs = one_out+two_out


zero_out_df = full_df.iloc[zero_out]
two_out_df = full_df.iloc[two_out]
one_out_df = full_df.iloc[one_out]
fulltest_df = full_df.iloc[fulltest_ixs]

train_data = zero_out_df.drop('pKi_diff', axis=1)
test_data = two_out_df.drop('pKi_diff', axis=1)
one_data = one_out_df.drop('pKi_diff', axis=1)
fulltest_data = fulltest_df.drop('pKi_diff', axis=1)

label = 'is_AC'


y_train = train_data[label]
y_test = test_data[label]
y_one = one_data[label]
y_fulltest = fulltest_data[label]

X_train = pd.DataFrame(train_data.drop(columns=[label]))
X_test = pd.DataFrame(test_data.drop(columns=[label]))
X_one = pd.DataFrame(one_data.drop(columns=[label]))
X_fulltest = pd.DataFrame(fulltest_data.drop(columns=[label]))

In [5]:
predictor = TabularPredictor.load("./agClassifier")

## Evaluation metrics on test set 

For evaluation on full set, replace <code>test_data</code> by <code>fulltest_data</code>.

In [12]:
# Metrics data for test set only

metrics_data = []
for m in predictor.get_model_names():
    predictor.set_model_best(m)

    m_metrics = get_ag_metrics(predictor, test_data)
    m_metrics.insert(0, m)

    metrics_data.append(m_metrics)

metrics = pd.DataFrame(metrics_data, columns=['Model','Accuracy','Precision','Recall','Specificity','F1', 'MCC'])
metrics.to_csv('ag_metrics.csv', index=None)


In [13]:
metrics = pd.read_csv('ag_metrics.csv')

In [15]:
metrics.style.highlight_max(color = 'darkgreen', axis = 0)

Unnamed: 0,Model,Accuracy,Precision,Recall,Specificity,F1,MCC
0,RandomForestGini_BAG_L1,0.910324,0.690598,0.402792,0.976478,0.508816,0.483654
1,RandomForestEntr_BAG_L1,0.910899,0.697232,0.401795,0.977258,0.509804,0.486085
2,CatBoost_BAG_L1,0.913888,0.760246,0.36989,0.984795,0.497653,0.492285
3,ExtraTreesGini_BAG_L1,0.911244,0.698795,0.404786,0.977258,0.512626,0.488744
4,ExtraTreesEntr_BAG_L1,0.911244,0.70087,0.401795,0.977648,0.510773,0.487763
5,NeuralNetFastAI_BAG_L1,0.905496,0.656304,0.378863,0.974139,0.480405,0.452316
6,NeuralNetTorch_BAG_L1,0.909864,0.653147,0.465603,0.967771,0.543655,0.503944
7,RandomForestGini_BAG_L2,0.913198,0.680758,0.465603,0.97154,0.55299,0.518019
8,RandomForestEntr_BAG_L2,0.912049,0.675,0.457627,0.97128,0.545455,0.510306
9,CatBoost_BAG_L2,0.912854,0.677279,0.4666,0.97102,0.552538,0.516866


### To TeX

In [None]:
tex_content = metrics.style.highlight_max(color = 'green', axis = 0, subset=["MCC"]).highlight_max(color = 'blue', axis=0, subset=["Accuracy", "Precision", "Recall", "Specificity"]).to_latex()

re_borders = re.compile(r"begin\{tabular\}\{([^\}]+)\}")
borders = re_borders.findall(tex_content)[0]
borders = '|'.join(list(borders))
tex_content = re_borders.sub("begin{tabular}{|" + borders + "|}", tex_content)

f = open("./latex/baseline_metrics_full","w+")
f.write(tex_content)
f.close()