In [1]:
from platform import python_version
print(python_version())

3.8.10


In [2]:
import numpy as np
import pandas as pd
import feyn
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from functions import modsum, model_features_chart, crossvalidation_as_framework

feyn._qlattice._USE_V2_API=True

In [3]:
feyn.__version__

'2.1.3'

# AD Case

In [4]:
# Load the data
data = pd.read_csv("../data/ad_omics.csv")

In [5]:
# Define the semantic types
stypes = {}
for f in data.columns:
    if data[f].dtype =='object':
        stypes[f] = 'c'
        
# Set random seed for reproducibility
random_seed = 42

In [6]:
target = "_clinical AD diagnosis"

In [7]:
train_val, test = train_test_split(data, test_size=0.2, stratify=data[target], random_state=random_seed)

In [8]:
epochs = 50
criterion = 'wide_parsimony'
max_complexity = 10
random_seed = 42
sw = np.where(train_val[target] == 1, np.sum(train_val[target] == 0)/sum(train_val[target]), 1)

In [9]:
ql = feyn.connect_qlattice(server="https://qlattice.stage.abzu.ai") # Connecting
ql.reset(random_seed) # Resetting
models_single = ql.auto_run(data = train_val,
                         output_name = target,
                         kind = "classification",
                         stypes = stypes,
                         n_epochs = epochs,
                         criterion=criterion,
                         max_complexity = max_complexity,
                         sample_weights=sw)

In [10]:
for m in models_single:
    print(m.wide_parsimony)

117.5374361065451
119.5441047095184
119.87809028581316
120.35469954744758
120.58937033885454
121.11779638561347
124.22624321800942
124.26697833027842
124.42091175883573
124.57650845243117


In [11]:
for m in models_single:
    print(m.bic)

67.35292573334159
69.3595943363149
69.69357991260966
70.17018917424407
70.40485996565104
70.93328601240997
74.0417328448059
74.08246795707491
74.23640138563222
74.39199807922766


### Cross-validation as a framework

In [12]:
results_bic = crossvalidation_as_framework(data,
                                     target,
                                     kind = "classification",
                                     stypes = stypes,
                                     n_epochs = 50,
                                     criterion='bic',
                                     max_complexity = 10)

In [13]:
results_bic

Unnamed: 0,model_structure,fold,aic,bic,roc_auc_train,accuracy_train,roc_auc_val,accuracy_val,pr_auc,f1,query_string
0,logreg(HSPA8 + (IGLV4-69 + LDHB + MAPT + SST)**2),0,10.005911,23.46265,1.0,1.0,0.872222,0.75,0.856864,0.6,"add(""HSPA8"", squared(add(add(""MAPT"", add(""LDHB..."
0,logreg(C1R + GSTP1 + (CNTNAP4 + MAPT)**2),0,21.27818,32.043571,0.997802,0.972477,0.788889,0.75,0.656294,0.645161,"add(squared(add(""MAPT"", ""CNTNAP4"")), add(""GSTP..."
0,logreg(A2M + EPHA4 + MAPT + log(GPI + YWHAG)),0,19.690722,33.147462,0.999634,0.990826,0.861111,0.857143,0.838204,0.6,"add(add(add(""EPHA4"", ""MAPT""), log(add(""YWHAG"",..."
0,logreg(GPC1 + LTA4H + MAPT**2),0,25.577988,33.652031,0.993407,0.981651,0.844444,0.75,0.823283,0.62069,"add(add(squared(""MAPT""), ""GPC1""), ""LTA4H"")"
0,logreg(IGLV4-69 + MAPT**2 + NBL1),0,28.332177,36.406221,0.993773,0.954128,0.9,0.75,0.863737,0.666667,"add(squared(""MAPT""), add(""NBL1"", ""IGLV4-69""))"
0,logreg(IGLV4-69 + MAPT**2),0,32.618145,38.000841,0.991209,0.954128,0.877778,0.785714,0.841198,0.642857,"add(""IGLV4-69"", squared(""MAPT""))"
0,logreg(HEXB + IGLV4-69 + MAPT**2 + PTPRN),0,27.482325,38.247716,0.995238,0.972477,0.883333,0.785714,0.851428,0.642857,"add(""PTPRN"", add(add(""IGLV4-69"", squared(""MAPT..."
0,logreg(FSTL1 + MAPT**2 + YWHAG),0,30.565728,38.639771,0.99304,0.972477,0.877778,0.857143,0.848334,0.666667,"add(add(squared(""MAPT""), ""YWHAG""), ""FSTL1"")"
0,logreg(MAPT**2 + ROBO1),0,34.48791,39.870606,0.988278,0.954128,0.833333,0.785714,0.778578,0.642857,"add(squared(""MAPT""), ""ROBO1"")"
0,logreg(CRP + exp(MAPT)),0,35.281605,40.664301,0.989377,0.944954,0.872222,0.785714,0.786315,0.740741,"add(exp(""MAPT""), ""CRP"")"


In [14]:
results_wide_parsimony = crossvalidation_as_framework(data,
                                     target,
                                     kind = "classification",
                                     stypes = stypes,
                                     n_epochs = 50,
                                     criterion='wide_parsimony',
                                     max_complexity = 10)

KeyboardInterrupt: 

In [None]:
results_bic.groupby("fold").first().roc_auc_val.mean(), results_bic.groupby("fold").first().accuracy_val.mean()

In [None]:
results_wide_parsimony.groupby("fold").first().roc_auc_val.mean(), results_wide_parsimony.groupby("fold").first().accuracy_val.mean()

# BC case

In [None]:
data_bc = pd.read_csv('../data/brca_data_w_meta.csv')
 
# Define the target variable
target = "vital.status"

stypes = {}
for f in data_bc.columns:
    if data_bc[f].dtype == 'object':
        stypes[f] = 'c'

In [None]:
results_bic_bc = pd.DataFrame(columns=['model_structure', 'fold', 'aic', 'bic', 'roc_auc_train',
                                            'accuracy_train', 'roc_auc_val', 'accuracy_val', 'pr_auc', 'f1'])

for i in range(3):
    res = crossvalidation_as_framework(data_bc,
                                     target,
                                     kind = "classification",
                                     stypes = stypes,
                                     n_epochs = 50,
                                     criterion='bic',
                                     max_complexity = 10,
                                     random_state = i)
    
    results_bic_bc = results_bic_bc.append(res)

In [None]:
results_wide_parsimony_bc = pd.DataFrame(columns=['model_structure', 'fold', 'aic', 'bic', 'roc_auc_train',
                                            'accuracy_train', 'roc_auc_val', 'accuracy_val', 'pr_auc', 'f1'])

for i in range(3):
    res = crossvalidation_as_framework(data_bc,
                                     target,
                                     kind = "classification",
                                     stypes = stypes,
                                     n_epochs = 50,
                                     criterion='wide_parsimony',
                                     max_complexity = 10,
                                     random_state = i)
    
    results_wide_parsimony_bc = results_wide_parsimony_bc.append(res)

In [None]:
results_bic_bc.roc_auc_val.mean(), results_bic_bc.accuracy_val.mean()

In [None]:
results_wide_parsimony_bc.roc_auc_val.mean(), results_wide_parsimony_bc.accuracy_val.mean()

In [None]:
len(results_bic_bc)