In [11]:
import numpy as np
import scipy
from scipy import stats
import sklearn as sk
from sklearn import discriminant_analysis
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import unique_labels
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
import tqdm as tqdm
import random

import pandas as pd

In [5]:
def prec_test(prec0,prec1,n0,n1,alpha):
    tau0=np.zeros(prec0.shape)
    tau1=np.zeros(prec0.shape)
    beta0=np.zeros(prec0.shape)
    beta1=np.zeros(prec0.shape)
    f0=np.zeros(prec0.shape)
    f1=np.zeros(prec0.shape)   
    theta0=np.zeros(prec0.shape)
    theta1=np.zeros(prec0.shape)
    t=np.zeros(prec0.shape) 
    t2=np.zeros(prec0.shape) 
    for i in range(tau0.shape[0]):
        for j in range(tau0.shape[1]):
            if i > j:
                continue
            tau0[i,j]=prec0[i,j]/(prec0[i,i]*prec0[j,j])
            tau1[i,j]=prec1[i,j]/(prec1[i,i]*prec1[j,j])
    for i in range(tau0.shape[0]):
        for j in range(tau0.shape[1]):
            if i > j:
                continue        
            beta0[i,j]=-prec0[i,j]/prec0[j,j]
            beta1[i,j]=-prec1[i,j]/prec1[j,j]
#             print(tau0[j,j])
            f0[i,j]=tau0[i,j]/(tau0[i,i]*tau0[j,j])
            f1[i,j]=tau1[i,j]/(tau1[i,i]*tau1[j,j])
            theta0[i,j]=(1+tau0[i,i]/tau0[j,j]*beta0[i,j]**2)/(n0*tau0[i,i]*tau0[j,j])
            theta1[i,j]=(1+tau1[i,i]/tau1[j,j]*beta1[i,j]**2)/(n1*tau1[i,i]*tau1[j,j])
            t[i,j]=(f0[i,j]-f1[i,j])/np.sqrt(theta0[i,j]+theta1[i,j])
            t2[i,j]=(f0[i,j]-f1[i,j])**2/(theta0[i,j]+theta1[i,j])
#     print(i,j)
    m=max(t2.flatten())
    q_alpha=-np.log(8*np.pi)-2*np.log(np.log(1/(1-alpha)))
#     if m>=(q_alpha+4*np.log(prec0.shape[0])-np.log(np.log(prec0.shape[0]))):
#         print("They are not equal")
#     else:
#         print("They are equal")
    return(t,t2, q_alpha)
            
            
            
def t_hat(test,alpha):
    p=test.shape[1]
    bp=np.sqrt(4*np.log(p)-2*np.log(np.log(p)))
    
    candidate_t=[t for t in np.unique(np.abs(test)) if t>=0 and t<=bp]
    t_hat=None
    for candidate_t_hat in candidate_t:
        if (1-scipy.stats.norm.cdf(candidate_t_hat))*p*(p-1)/max(1,np.sum(np.abs(test)>candidate_t_hat))<= alpha:
            t_hat=candidate_t_hat
            break
    if t_hat==None:
#         print("No feasibile t_hat found")
        t_hat=2*np.sqrt(np.log(p))
    return t_hat
                    
class GLQDA_Replace(BaseEstimator, ClassifierMixin):

    def __init__(self, class0_pen=0,class1_pen=0,backward=5):
        self.class0_pen = class0_pen
        self.class1_pen = class1_pen
        self.backward = backward
    def get_params(self,deep=True):
        param={"class0_pen":self.class0_pen, 
                "class1_pen": self.class1_pen,
              "backward": self.backward}
        return param

    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = sk.utils.check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)
        self.X_ = X
        self.X_reduced = X
        self.y_ = y
        ## QDA parameters
        self.y0_mu=np.sum(self.y_==self.classes_[0])/len(self.y_)
        self.y1_mu=np.sum(self.y_==self.classes_[1])/len(self.y_)
        self.backward=min(self.backward,self.X_.shape[1])

        # Initial step
        ## Compute the whole data covariance
        glasso_All = sk.covariance.GraphicalLasso(alpha=self.class0_pen,tol=2e-3,assume_centered=False).fit(self.X_)
        self.group_cov=glasso_All.covariance_
        self.group_pre=glasso_All.precision_
        ## Calculate the group covariance matrix
        self.glasso0 = sk.covariance.GraphicalLasso(alpha=self.class0_pen,tol=2e-3,assume_centered=False).fit(self.X_reduced[self.y_==self.classes_[0]])
        self.glasso1 = sk.covariance.GraphicalLasso(alpha=self.class1_pen,tol=2e-3,assume_centered=False).fit(self.X_reduced[self.y_==self.classes_[1]])
        self.pre0= self.glasso0.precision_
        self.pre1=self.glasso1.precision_
        
        ## Starting for loop
        ## Calculate difference of precision matrix
        diff=np.sum(np.abs(self.pre0-self.pre1),axis=1)
        diff=diff/np.sum(diff)
        self.index=np.argsort(diff)
        self.pre0[self.index[:self.backward],:]=self.group_pre[self.index[:self.backward],:]
        self.pre0[:,self.index[:self.backward]]=self.group_pre[:,self.index[:self.backward]]
        self.pre1[self.index[:self.backward],:]=self.group_pre[self.index[:self.backward],:]
        self.pre1[:,self.index[:self.backward]]=self.group_pre[:,self.index[:self.backward]]


#         ## Get the reduced X
#         self.X_reduced=self.X_[:,self.index[self.backward:]]

#         self.glasso0_r = sk.covariance.GraphicalLasso(alpha=self.class0_pen,tol=2e-3,assume_centered=False).fit(self.X_reduced[self.y_==self.classes_[0]])
#         self.glasso1_r = sk.covariance.GraphicalLasso(alpha=self.class1_pen,tol=2e-3,assume_centered=False).fit(self.X_reduced[self.y_==self.classes_[1]])

#         for i in range(len(self.index[self.backward:])):
#             for j in range(len(self.index[self.backward:])):
#                 self.pre0[self.reduced[self.backward+i],self.reduced[self.backward+j]]=self.glasso0_r.precision_[i,j]
#                 self.pre1[self.reduced[self.backward+i],self.reduced[self.backward+j]]=self.glasso1_r.precision_[i,j]

        (sign, logdet)=np.linalg.slogdet(np.linalg.inv(self.pre0))
        self.det_cov0=sign* logdet
        (sign, logdet)=np.linalg.slogdet(np.linalg.inv(self.pre1))
        self.det_cov1=sign* logdet
        
        self.X0_mu=np.mean(self.X_[self.y_==self.classes_[0]],axis=0)
        self.X1_mu=np.mean(self.X_[self.y_==self.classes_[1]],axis=0)

        return self

    def predict(self, X):

        # Check is fit had been called
        sk.utils.validation.check_is_fitted(self)
    
        # Input validation
        X = sk.utils.check_array(X)
#         X=X[:,self.reduced]
        
        py_0=-0.5*np.matmul(np.matmul((X-self.X0_mu),self.pre0),np.transpose(X-self.X0_mu))
        py_0=py_0.diagonal()
        py_0=py_0-0.5*self.det_cov0+np.log(self.y0_mu)
        
        py_1=-0.5*np.matmul(np.matmul((X-self.X1_mu),self.pre1),np.transpose(X-self.X1_mu))
        py_1=py_1.diagonal()
        py_1=py_1-0.5*self.det_cov1+np.log(self.y1_mu)
                               
        predict_class=[self.classes_[int(py_0[i]<py_1[i])] for i in range(len(X))]
                               
        return predict_class
    
    def predict_log_proba(self,X):
                # Check is fit had been called
        sk.utils.validation.check_is_fitted(self)

        # Input validation
        X = sk.utils.check_array(X)
        py_0=-0.5*np.matmul(np.matmul((X-self.X0_mu),self.pre0),np.transpose(X-self.X0_mu))
        py_0=py_0.diagonal()
        py_0=py_0-0.5*np.log(self.det_cov0)+np.log(self.y0_mu)
        
        py_1=-0.5*np.matmul(np.matmul((X-self.X1_mu),self.pre1),np.transpose(X-self.X1_mu))
        py_1=py_1.diagonal()
        py_1=py_1-0.5*np.log(self.det_cov1)+np.log(self.y1_mu)
                               
        predict_log_proba=np.transpose([py_0,py_1])
                               
        return predict_log_proba

    def predict_proba(self,X):
                # Check is fit had been called
        sk.utils.validation.check_is_fitted(self)

        # Input validation
        X = sk.utils.check_array(X)
        py_0=-0.5*np.matmul(np.matmul((X-self.X0_mu),self.pre0),np.transpose(X-self.X0_mu))
        py_0=py_0.diagonal()
        py_0=py_0-0.5*np.log(self.det_cov0)+np.log(self.y0_mu)
        
        py_1=-0.5*np.matmul(np.matmul((X-self.X1_mu),self.pre1),np.transpose(X-self.X1_mu))
        py_1=py_1.diagonal()
        py_1=py_1-0.5*np.log(self.det_cov1)+np.log(self.y1_mu)
                               
        predict_proba=np.transpose([np.exp(py_0),np.exp(py_1)])
                               
        return predict_proba

class GLQDA_Replace_cai(BaseEstimator, ClassifierMixin):

    def __init__(self, class0_pen=0,class1_pen=0,p_val=0.01,backward=0):
        self.class0_pen = class0_pen
        self.class1_pen = class1_pen
        self.backward = backward
        self.p_val = p_val
    def get_params(self,deep=True):
        param={"class0_pen":self.class0_pen, 
                "class1_pen": self.class1_pen,
              "backward": self.backward,
              'p_val':self.p_val}
        return param

    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = sk.utils.check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)
        self.X_ = X
        self.X_reduced = X
        self.y_ = y
        ## QDA parameters
        self.y0_mu=np.sum(self.y_==self.classes_[0])/len(self.y_)
        self.y1_mu=1-self.y0_mu
        self.backward=min(self.backward,self.X_.shape[1]-3)

        # Initial step
        ## Compute the whole data covariance
        glasso_All = sk.covariance.GraphicalLasso(alpha=self.class0_pen,tol=2e-3,assume_centered=False).fit(self.X_)
        self.group_cov=glasso_All.covariance_
        self.group_pre=glasso_All.precision_
        ## Calculate the group covariance matrix
        self.glasso0 = sk.covariance.GraphicalLasso(alpha=self.class0_pen,tol=2e-3,assume_centered=False).fit(self.X_reduced[self.y_==self.classes_[0]])
        self.glasso1 = sk.covariance.GraphicalLasso(alpha=self.class1_pen,tol=2e-3,assume_centered=False).fit(self.X_reduced[self.y_==self.classes_[1]])
        self.pre0= self.glasso0.precision_
        self.pre1=self.glasso1.precision_
        ## Starting for loop
        ## Calculate difference of precision matrix
        t,t2, q=prec_test(self.pre0,self.pre1,sum(self.y_==self.classes_[0]),sum(self.y_==self.classes_[1]),self.p_val)
        t_thresh=t_hat(t,self.p_val)
        hypothesis=np.logical_or(np.transpose(np.abs(t)>t_thresh),np.abs(t)>t_thresh)
        test_reject=np.sum(hypothesis,axis=0)
        self.index=np.argsort(test_reject)
        for i in range(len(self.index)):
            if test_reject[self.index[i]]<1:
                continue
            else:
                break
        self.best_share=i
        self.pre0= self.glasso0.precision_
        self.pre1=self.glasso1.precision_
        self.pre0[self.index[:self.best_share],:]=self.group_pre[self.index[:self.best_share],:]
        self.pre0[:,self.index[:self.best_share]]=self.group_pre[:,self.index[:self.best_share]]
        self.pre1[self.index[:self.best_share],:]=self.group_pre[self.index[:self.best_share],:]
        self.pre1[:,self.index[:self.best_share]]=self.group_pre[:,self.index[:self.best_share]]

        (sign, logdet)=np.linalg.slogdet(np.linalg.inv(self.pre0))
        self.det_cov0=sign* logdet
        (sign, logdet)=np.linalg.slogdet(np.linalg.inv(self.pre1))
        self.det_cov1=sign* logdet
        
        self.X0_mu=np.mean(self.X_[self.y_==self.classes_[0]],axis=0)
        self.X1_mu=np.mean(self.X_[self.y_==self.classes_[1]],axis=0)
        return self

    def predict(self, X):

        # Check is fit had been called
        sk.utils.validation.check_is_fitted(self)
    
        # Input validation
        X = sk.utils.check_array(X)
#         X=X[:,self.reduced]
        
        py_0=-0.5*np.matmul(np.matmul((X-self.X0_mu),self.pre0),np.transpose(X-self.X0_mu))
        py_0=py_0.diagonal()
        py_0=py_0-0.5*self.det_cov0+np.log(self.y0_mu)
        
        py_1=-0.5*np.matmul(np.matmul((X-self.X1_mu),self.pre1),np.transpose(X-self.X1_mu))
        py_1=py_1.diagonal()
        py_1=py_1-0.5*self.det_cov1+np.log(self.y1_mu)
                               
        predict_class=[self.classes_[int(py_0[i]<py_1[i])] for i in range(len(X))]
                               
        return predict_class
    
    def predict_log_proba(self,X):
                # Check is fit had been called
        sk.utils.validation.check_is_fitted(self)

        # Input validation
        X = sk.utils.check_array(X)
        py_0=-0.5*np.matmul(np.matmul((X-self.X0_mu),self.pre0),np.transpose(X-self.X0_mu))
        py_0=py_0.diagonal()
        py_0=py_0-0.5*np.log(self.det_cov0)+np.log(self.y0_mu)
        
        py_1=-0.5*np.matmul(np.matmul((X-self.X1_mu),self.pre1),np.transpose(X-self.X1_mu))
        py_1=py_1.diagonal()
        py_1=py_1-0.5*np.log(self.det_cov1)+np.log(self.y1_mu)
                               
        predict_log_proba=np.transpose([py_0,py_1])
                               
        return predict_log_proba

    def predict_proba(self,X):
                # Check is fit had been called
        sk.utils.validation.check_is_fitted(self)

        # Input validation
        X = sk.utils.check_array(X)
        py_0=-0.5*np.matmul(np.matmul((X-self.X0_mu),self.pre0),np.transpose(X-self.X0_mu))
        py_0=py_0.diagonal()
        py_0=py_0-0.5*np.log(self.det_cov0)+np.log(self.y0_mu)
        
        py_1=-0.5*np.matmul(np.matmul((X-self.X1_mu),self.pre1),np.transpose(X-self.X1_mu))
        py_1=py_1.diagonal()
        py_1=py_1-0.5*np.log(self.det_cov1)+np.log(self.y1_mu)
                               
        predict_proba=np.transpose([np.exp(py_0),np.exp(py_1)])
                               
        return predict_proba
        

In [12]:

print("GLQDA Cai")
selected=[]
f1_cai=[]
f1_qda=[]
f1_lda=[]
cv_set=[]
com_vars=[]
params={'GLQDA__class0_pen': 0.05*np.arange(1,5),
 'GLQDA__class1_pen': 0.05*np.arange(1,5),
        'GLQDA__p_val':[0.01,0.05]
 }
pipe = Pipeline([('scaler', sk.preprocessing.StandardScaler()), ('GLQDA', GLQDA_Replace_cai())])
for i in tqdm.tqdm(range(50)):
    ## Train test split
    n_samp_p_class=500
    vars=random.sample(range(6),4)
    g_cov_1=np.identity(6)
    g_cov_2=np.identity(6)
    # g_cov_2[vars[0],vars[0]]=g_cov_2[vars[1],vars[1]]=5
    # g_cov_2[vars[0],vars[1]]=g_cov_2[vars[1],vars[0]]=3
    # g_cov_2[vars[2],vars[2]]=g_cov_2[vars[3],vars[3]]=5
    # g_cov_2[vars[2],vars[3]]=g_cov_2[vars[3],vars[2]]=3
    g_cov_2[vars[0],vars[0]]=g_cov_2[vars[1],vars[1]]=g_cov_2[vars[2],vars[2]]=g_cov_2[vars[3],vars[3]]=5
    g_cov_2[vars[0],vars[1]]=g_cov_2[vars[1],vars[0]]=g_cov_2[vars[0],vars[2]]=g_cov_2[vars[2],vars[0]]=g_cov_2[vars[0],vars[3]]=g_cov_2[vars[3],vars[0]]= g_cov_2[vars[1],vars[2]]=g_cov_2[vars[2],vars[1]]=g_cov_2[vars[1],vars[3]]=g_cov_2[vars[3],vars[1]]=g_cov_2[vars[2],vars[3]]=g_cov_2[vars[3],vars[2]]=3
    X_sim=np.vstack([stats.multivariate_normal([0,0,0,0,0,0],g_cov_1).rvs(n_samp_p_class),
                stats.multivariate_normal([0,0,0,0,0,0],g_cov_2).rvs(n_samp_p_class)])

    Y_sim=np.append(['Normal']*n_samp_p_class,['Schizophrenia']*n_samp_p_class)
    X_train, X_test, y_train, y_test = sk.model_selection.train_test_split(
       X_sim, Y_sim, test_size=0.2, random_state=i)
    
    ## GLQDA
    grid_normal=sk.model_selection.GridSearchCV(pipe,param_grid=params, scoring=sk.metrics.make_scorer(sk.metrics.accuracy_score),
                            return_train_score=True)
    grid_normal.fit(X_train, y_train)
    # print(sk.metrics.classification_report(y_test,grid_normal.predict(X_test)))
    f1_cai.append(sk.metrics.accuracy_score(y_test,grid_normal.predict(X_test)))
    # selected.append(X_train.columns.values.astype("str")[grid_normal.best_estimator_["GLQDA"].index[:grid_normal.best_estimator_["GLQDA"].best_share]])
    cv_set.append([i,"GLLDA",f1_cai[-1]])
    
    ##QDA
    clf = sk.discriminant_analysis.QuadraticDiscriminantAnalysis(store_covariance=True)
    clf.fit(X_train, y_train)
    # print(sk.metrics.classification_report(y_test,clf.predict(X_test)))
    f1_qda.append(sk.metrics.accuracy_score(y_test,clf.predict(X_test)))
    cv_set.append([i,"QDA",f1_qda[-1]])

    ## LDA
    clf = sk.discriminant_analysis.LinearDiscriminantAnalysis(store_covariance=True)
    clf.fit(X_train, y_train)
    # print(sk.metrics.classification_report(y_test,clf.predict(X_test)))
    f1_lda.append(sk.metrics.accuracy_score(y_test,clf.predict(X_test)))
    cv_set.append([i,"LDA",f1_lda[-1]])
    idf=grid_normal.best_estimator_["GLQDA"].index[grid_normal.best_estimator_["GLQDA"].best_share:].tolist()
    idf.sort()
    vars.sort()
    print("Identified columns",idf)

    print("True columns [%s, %s]"%(vars[0],vars[1]))
    v1=np.zeros(6)
    v1[idf]=1
    v2=np.zeros(6)
    v2[vars]=1

    com_vars.append(sk.metrics.cohen_kappa_score(v1,v2))
    # print(grid_normal.best_params_)
print("GLQDA Mean F1 %.2f(%.2f)"%(np.mean(f1_cai),np.std(f1_cai)))
print("QDA Mean F1 %.2f(%.2f)"%(np.mean(f1_qda),np.std(f1_qda)))
print("LDA Mean F1 %.2f(%.2f)"%(np.mean(f1_lda),np.std(f1_lda)))
print("Correctly identified differnet terms:  %.2f(%.2f)"%(np.mean(com_vars),np.std(com_vars)))

GLQDA Cai


  2%|▏         | 1/50 [00:02<01:54,  2.33s/it]

Identified columns [0, 1, 2, 4]
True columns [0, 1]


  4%|▍         | 2/50 [00:04<01:51,  2.33s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


  6%|▌         | 3/50 [00:06<01:47,  2.29s/it]

Identified columns [0, 2, 3, 5]
True columns [0, 2]


  8%|▊         | 4/50 [00:09<01:46,  2.31s/it]

Identified columns [0, 1, 2, 4]
True columns [0, 1]


 10%|█         | 5/50 [00:11<01:43,  2.30s/it]

Identified columns [0, 1, 3, 5]
True columns [0, 1]


 12%|█▏        | 6/50 [00:13<01:41,  2.30s/it]

Identified columns [0, 1, 4, 5]
True columns [0, 1]


 14%|█▍        | 7/50 [00:16<01:38,  2.30s/it]

Identified columns [1, 2, 4, 5]
True columns [1, 2]


 16%|█▌        | 8/50 [00:18<01:35,  2.28s/it]

Identified columns [0, 1, 2, 5]
True columns [0, 1]


 18%|█▊        | 9/50 [00:20<01:33,  2.29s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


 20%|██        | 10/50 [00:22<01:31,  2.28s/it]

Identified columns [0, 3, 4, 5]
True columns [0, 3]


 22%|██▏       | 11/50 [00:25<01:29,  2.28s/it]

Identified columns [0, 3, 4, 5]
True columns [0, 3]


 24%|██▍       | 12/50 [00:27<01:26,  2.28s/it]

Identified columns [0, 2, 3, 5]
True columns [0, 2]


 26%|██▌       | 13/50 [00:29<01:24,  2.28s/it]

Identified columns [0, 2, 4, 5]
True columns [0, 2]


 28%|██▊       | 14/50 [00:32<01:21,  2.27s/it]

Identified columns [1, 2, 3, 4]
True columns [1, 2]


 30%|███       | 15/50 [00:34<01:20,  2.29s/it]

Identified columns [0, 1, 2, 5]
True columns [0, 1]


 32%|███▏      | 16/50 [00:36<01:17,  2.28s/it]

Identified columns [0, 2, 4, 5]
True columns [0, 2]


 34%|███▍      | 17/50 [00:38<01:15,  2.27s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


 36%|███▌      | 18/50 [00:41<01:12,  2.27s/it]

Identified columns [0, 1, 2, 5]
True columns [0, 1]


 38%|███▊      | 19/50 [00:43<01:10,  2.28s/it]

Identified columns [0, 1, 2, 3]
True columns [0, 1]


 40%|████      | 20/50 [00:45<01:08,  2.27s/it]

Identified columns [0, 1, 3, 5]
True columns [0, 1]


 42%|████▏     | 21/50 [00:47<01:05,  2.27s/it]

Identified columns [1, 3, 4, 5]
True columns [1, 3]


 44%|████▍     | 22/50 [00:50<01:03,  2.27s/it]

Identified columns [0, 1, 2, 4]
True columns [0, 1]


 46%|████▌     | 23/50 [00:52<01:01,  2.27s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


 48%|████▊     | 24/50 [00:54<00:59,  2.27s/it]

Identified columns [0, 2, 3, 5]
True columns [0, 2]


 50%|█████     | 25/50 [00:57<00:56,  2.27s/it]

Identified columns [1, 2, 4, 5]
True columns [1, 2]


 52%|█████▏    | 26/50 [00:59<00:54,  2.28s/it]

Identified columns [0, 2, 3, 5]
True columns [0, 2]


 54%|█████▍    | 27/50 [01:01<00:52,  2.27s/it]

Identified columns [2, 3, 4, 5]
True columns [2, 3]


 56%|█████▌    | 28/50 [01:03<00:49,  2.27s/it]

Identified columns [0, 1, 3, 5]
True columns [0, 1]


 58%|█████▊    | 29/50 [01:06<00:47,  2.27s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


 60%|██████    | 30/50 [01:08<00:45,  2.27s/it]

Identified columns [0, 3, 4, 5]
True columns [0, 3]


 62%|██████▏   | 31/50 [01:10<00:43,  2.27s/it]

Identified columns [0, 1, 3, 4]
True columns [0, 1]


 64%|██████▍   | 32/50 [01:12<00:40,  2.27s/it]

Identified columns [0, 2, 3, 5]
True columns [0, 2]


 66%|██████▌   | 33/50 [01:15<00:38,  2.27s/it]

Identified columns [1, 2, 4, 5]
True columns [1, 2]


 68%|██████▊   | 34/50 [01:17<00:36,  2.28s/it]

Identified columns [0, 2, 3, 4]
True columns [0, 2]


 70%|███████   | 35/50 [01:19<00:34,  2.29s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


 72%|███████▏  | 36/50 [01:22<00:32,  2.30s/it]

Identified columns [1, 2, 4, 5]
True columns [1, 2]


 74%|███████▍  | 37/50 [01:24<00:30,  2.31s/it]

Identified columns [0, 1, 2, 3]
True columns [0, 1]


 76%|███████▌  | 38/50 [01:26<00:27,  2.31s/it]

Identified columns [0, 2, 3, 4]
True columns [0, 2]


 78%|███████▊  | 39/50 [01:29<00:25,  2.31s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


 80%|████████  | 40/50 [01:31<00:23,  2.31s/it]

Identified columns [0, 1, 3, 5]
True columns [0, 1]


 82%|████████▏ | 41/50 [01:33<00:20,  2.32s/it]

Identified columns [0, 1, 2, 5]
True columns [0, 1]


 84%|████████▍ | 42/50 [01:36<00:18,  2.32s/it]

Identified columns [1, 3, 4, 5]
True columns [1, 3]


 86%|████████▌ | 43/50 [01:38<00:16,  2.31s/it]

Identified columns [1, 2, 3, 5]
True columns [1, 2]


 88%|████████▊ | 44/50 [01:40<00:13,  2.30s/it]

Identified columns [0, 2, 3, 4]
True columns [0, 2]


 90%|█████████ | 45/50 [01:42<00:11,  2.30s/it]

Identified columns [0, 1, 3, 4]
True columns [0, 1]


 92%|█████████▏| 46/50 [01:45<00:09,  2.30s/it]

Identified columns [1, 2, 4, 5]
True columns [1, 2]


 94%|█████████▍| 47/50 [01:47<00:06,  2.29s/it]

Identified columns [1, 3, 4, 5]
True columns [1, 3]


 96%|█████████▌| 48/50 [01:49<00:04,  2.29s/it]

Identified columns [0, 2, 4, 5]
True columns [0, 2]


 98%|█████████▊| 49/50 [01:52<00:02,  2.30s/it]

Identified columns [2, 3, 4, 5]
True columns [2, 3]


100%|██████████| 50/50 [01:54<00:00,  2.29s/it]

Identified columns [0, 3, 4, 5]
True columns [0, 3]
GLQDA Mean F1 0.81(0.03)
QDA Mean F1 0.80(0.03)
LDA Mean F1 0.50(0.04)
Correctly identified differnet terms:  1.00(0.00)



