In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from metAppDomain_ADM import NSGVisualizer
from pyAppDomain import AppDomainFpSimilarity

# using the structure activity landscape based AD 
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn import metrics

'''
STAGE I Calculate AD Metrics
'''
from metAppDomain_ADM  import NSG
def rigidWt(x, sCutoff=0.9):
    y = np.ones(shape=x.shape)
    y[x < sCutoff] = 0
    return y
#
def expWt(x, a=20, eps=1e-6):
    return np.exp(-a*(1-x)/(x + eps))
#
wtFunc1a = rigidWt
kw1a = {'sCutoff':0.9}
wtFunc2a = rigidWt
kw2a = {'sCutoff':0.9}
wtFunc1b = expWt
kw1b = {'a':20}
wtFunc2b = expWt
kw2b = {'a':20}

# load training set data
df_train = pd.read_csv('Training_Set.csv',index_col='CmpdID')
# load validation set data
df_ext = pd.read_csv('Validation_Set.csv',index_col='CmpdID')
# NSG
nsg = NSG(df_train,yCol='y_true',smiCol='neuSmi')
nsg.calcPairwiseSimilarityWithFp('MACCS_keys')
dfQTSM = nsg.genQTSM(df_ext,'neuSmi')

df_ext = df_ext.join(nsg.queryADMetrics(dfQTSM, wtFunc1=wtFunc1a,kw1=kw1a, wtFunc2=wtFunc2a,kw2=kw2a,code='|rigid'))
df_ext = df_ext.join(nsg.queryADMetrics(dfQTSM, wtFunc1=wtFunc1b,kw1=kw1b, wtFunc2=wtFunc2b,kw2=kw2b,code='|exp'))
df_ext.to_csv('dfEx_ADMetrics.csv')

'''
STAGE II Evaluate Model Performance with ADSAL 
'''

from sklearn import metrics
#set different ρs and IA cutoff values according your requirment
ρsDict = {
'rigid':[0.01, 0.1, 0.5, 1, 2, 3],
'exp':[0.01, 0.1, 0.5, 1, 2, 3]}

yt = df_ext['y_true']
yp = df_ext['y_pre']
IAVal_List = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]



for code in ['rigid','exp']:
    dfn = pd.DataFrame(index=IAVal_List,columns=ρsDict[code])
    dfR2= pd.DataFrame(index=IAVal_List,columns=ρsDict[code])
    dfRMSE = pd.DataFrame(index=IAVal_List,columns=ρsDict[code])


    for densLB in dfR2.columns:
        for LdUB in dfR2.index:
            adi = df_ext.index[(df_ext['simiDensity|'+code] >= densLB)&(df_ext['simiWtLD_w|'+code] <= LdUB)]
            dfn.loc[LdUB,densLB] = adi.shape[0]
            try:
                dfR2.loc[LdUB,densLB] = metrics.r2_score(yt[adi],yp[adi])
            except:
                dfR2.loc[LdUB,densLB] = np.nan
            dfRMSE.loc[LdUB,densLB] = np.sqrt(metrics.mean_squared_error(yt[adi],yp[adi]))


    #print the performance of model with within ADSAL on the external validation set

    dfn.to_csv('Val_a=20_Scutoff=0.9_{:s}_AD_n.csv'.format(code))
    dfR2.to_csv('Val_a=20_Scutoff=0.9_{:s}_AD_R2.csv'.format(code))
    dfRMSE.to_csv('Val_a=20_Scutoff=0.9_{:s}_AD_RMSE.csv'.format(code))

