In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

import itertools
import os
import warnings
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt
import sklearn as skl
import scipy.stats as spst
import sklearn.metrics as skmr
import xgboost as xgb
from tabpfn import TabPFNClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state
from tabpfn.preprocessing import PreprocessorConfig
from tabpfn.constants import ModelInterfaceConfig

In [None]:
Ds = list(range(3, 6))
method_infos = [
    #("XGBoost", xgb.XGBClassifier()),
    ("TabPFN-v2 (k=1, w/ fingerprint)", TabPFNClassifier(device='cpu', n_estimators=1, inference_config=ModelInterfaceConfig(FINGERPRINT_FEATURE=True))),
    ("TabPFN-v2 (k=4, w/ fingerprint)", TabPFNClassifier(device='cpu', n_estimators=4, inference_config=ModelInterfaceConfig(FINGERPRINT_FEATURE=True))),
    ("TabPFN-v2 (k=16, w/ fingerprint)", TabPFNClassifier(device='cpu', n_estimators=16, inference_config=ModelInterfaceConfig(FINGERPRINT_FEATURE=True))),
    ("TabPFN-v2 (k=1, w/o fingerprint)", TabPFNClassifier(device='cpu', n_estimators=1, inference_config=ModelInterfaceConfig(FINGERPRINT_FEATURE=False))),
    ("TabPFN-v2 (k=4, w/o fingerprint)", TabPFNClassifier(device='cpu', n_estimators=4, inference_config=ModelInterfaceConfig(FINGERPRINT_FEATURE=False))),
    ("TabPFN-v2 (k=16, w/o fingerprint)", TabPFNClassifier(device='cpu', n_estimators=16, inference_config=ModelInterfaceConfig(FINGERPRINT_FEATURE=False))),
]
STYLE = ['^-', 'o-', 'x-', '^:', 'o:', 'x:']

In [None]:
results_dict = {mname : {} for mname, _ in method_infos} 
pvalues_dict = {mname : {} for mname, _ in method_infos} 
for D in Ds:
    X = np.array(list(itertools.product([False, True], repeat=D))).astype(int)
    N = X.shape[0]
    y = np.sum(X, axis=1) % 2
    for mname, method in method_infos:
        y_test_agg = []
        y_test_pred_agg = []
        for n in range(N):
            X_train = np.r_[X[0:n, :], X[n+1:, :]]
            y_train = np.r_[y[0:n], y[n+1:]]
            train_ixs = rng.choice(N - 1, size=N - 1, replace=False)  # shuffle
            X_train = X_train[train_ixs, :]
            y_train = y_train[train_ixs]
            X_test = X[n:n+1, :]
            y_test = y[n:n+1]
            method.fit(X_train, y_train)
            y_test_pred = method.predict(X_test)
            y_test_agg.append(y_test)
            y_test_pred_agg.append(y_test_pred)
        y_test_agg = np.concatenate(y_test_agg)
        y_test_pred_agg = np.concatenate(y_test_pred_agg)
        is_correct = (y_test_agg == y_test_pred_agg)
        acc = np.mean(is_correct)
        print(D, mname, acc)
        results_dict[mname][D] = acc
        pvalues_dict[mname][D] = spst.binomtest(k=np.sum(is_correct), n=N, p=0.5, alternative='greater').pvalue

##### display(pd.DataFrame(results_dict))

ax = pd.DataFrame(results_dict).plot(style=STYLE)
ax.set_xticks(Ds)
#pd.DataFrame(results_dict).plot.bar(rot=0, ax=ax)
plt.savefig('parity-Nminus1.pdf')
plt.grid(True)

In [None]:
halfresults_dict = {mname : {} for mname, _ in method_infos} 
for D in Ds:
    X = np.array(list(itertools.product([False, True], repeat=D))).astype(int)
    N = X.shape[0]
    y = np.sum(X, axis=1) % 2
    for mname, method in method_infos:
        y_test_agg = []
        y_test_pred_agg = []
        for n in range(N):
            X_train = np.r_[X[0:n, :], X[n+1:, :]]
            y_train = np.r_[y[0:n], y[n+1:]]
            rng = check_random_state(n)
            train_ixs = rng.choice(N - 1, size=N // 2, replace=False)
            X_train = X_train[train_ixs, :]
            y_train = y_train[train_ixs]
            X_test = X[n:n+1, :]
            y_test = y[n:n+1]
            method.fit(X_train, y_train)
            y_test_pred = method.predict(X_test)
            y_test_agg.append(y_test)
            y_test_pred_agg.append(y_test_pred)
        y_test_agg = np.concatenate(y_test_agg)
        y_test_pred_agg = np.concatenate(y_test_pred_agg)
        acc = np.mean(y_test_agg == y_test_pred_agg)
        print(D, mname, acc)
        halfresults_dict[mname][D] = acc

In [None]:
ax = pd.DataFrame(halfresults_dict).plot(style=STYLE)
ax.set_xticks(Ds)
plt.savefig('parity-half.pdf')
plt.grid(True)

In [None]:
quarterresults_dict = {mname : {} for mname, _ in method_infos} 
for D in Ds:
    X = np.array(list(itertools.product([False, True], repeat=D))).astype(int)
    N = X.shape[0]
    y = np.sum(X, axis=1) % 2
    for mname, method in method_infos:
        y_test_agg = []
        y_test_pred_agg = []
        for n in range(N):
            X_train = np.r_[X[0:n, :], X[n+1:, :]]
            y_train = np.r_[y[0:n], y[n+1:]]
            rng = check_random_state(n)
            train_ixs = rng.choice(N - 1, size=N // 4, replace=False)
            X_train = X_train[train_ixs, :]
            y_train = y_train[train_ixs]
            X_test = X[n:n+1, :]
            y_test = y[n:n+1]
            method.fit(X_train, y_train)
            y_test_pred = method.predict(X_test)
            y_test_agg.append(y_test)
            y_test_pred_agg.append(y_test_pred)
        y_test_agg = np.concatenate(y_test_agg)
        y_test_pred_agg = np.concatenate(y_test_pred_agg)
        acc = np.mean(y_test_agg == y_test_pred_agg)
        print(D, mname, acc)
        quarterresults_dict[mname][D] = acc

In [None]:
ax = pd.DataFrame(quarterresults_dict).plot(style=STYLE)
ax.set_xticks(Ds)
plt.savefig('parity-quarter.pdf')
plt.grid(True)

In [None]:
constantresults_dict = {mname : {} for mname, _ in method_infos} 
for D in list(range(7, 14)):
    X = np.array(list(itertools.product([False, True], repeat=D))).astype(int)
    N = X.shape[0]
    y = np.sum(X, axis=1) % 2
    for mname, method in method_infos:
        y_test_agg = []
        y_test_pred_agg = []
        for n in range(N):
            X_train = np.r_[X[0:n, :], X[n+1:, :]]
            y_train = np.r_[y[0:n], y[n+1:]]
            rng = check_random_state(n)
            train_ixs = rng.choice(N - 1, size=2 ** 7 - 1, replace=False)
            X_train = X_train[train_ixs, :]
            y_train = y_train[train_ixs]
            X_test = X[n:n+1, :]
            y_test = y[n:n+1]
            method.fit(X_train, y_train)
            y_test_pred = method.predict(X_test)
            y_test_agg.append(y_test)
            y_test_pred_agg.append(y_test_pred)
        y_test_agg = np.concatenate(y_test_agg)
        y_test_pred_agg = np.concatenate(y_test_pred_agg)
        acc = np.mean(y_test_agg == y_test_pred_agg)
        print(D, mname, acc)
        constantresults_dict[mname][D] = acc

In [None]:
ax = pd.DataFrame(constantresults_dict).plot(style=STYLE)
ax.set_xticks(Ds)
plt.savefig('parity-constant.pdf')
plt.grid(True)