In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder

from tabpfn import TabPFNClassifier

In [4]:
def fullRead(pathToTable, sep, anthro = False):

  df_renamed = pd.read_csv(pathToTable, sep = sep, encoding = "latin_1")

  # reading and merging    
  
  if anthro == True:
    df_anthro = pd.read_csv("data/chronicAnthropometricCardiovascularData.csv", sep=";", decimal=",")
    df_renamed = df_renamed.merge(df_anthro)

    # separating by time moment and renaming

    df_renamed["Weight"] = ""
    df_renamed["BMI"] = ""
    df_renamed["Fat"] = ""
    df_renamed["CVRI"] = ""
    df_renamed["Bpmin"] = ""
    df_renamed["Bpmax"] = ""
    df_renamed["Frec"] = ""

    for i in range(len(df_renamed)):

        if df_renamed.loc[i]["Time"] == "Initial":
            df_renamed.loc[i,"Weight"] = df_renamed.loc[i]["Peso inicial"]
            df_renamed.loc[i,"BMI"] = df_renamed.loc[i]["IMC Inicial"]
            df_renamed.loc[i,"Fat"] = df_renamed.loc[i]["Grasa inicial"]
            df_renamed.loc[i,"CVRI"] = df_renamed.loc[i]["IRCV inicial"] 
            df_renamed.loc[i,"Bpmin"] = df_renamed.loc[i]["Bpmin inicial"] 
            df_renamed.loc[i,"Bpmax"] = df_renamed.loc[i]["Bpmax inicial"] 
            df_renamed.loc[i,"Frec"] = df_renamed.loc[i]["Frec inicial"] 
                
        if df_renamed.loc[i]["Time"] == "Final":
        
            df_renamed.loc[i,"Weight"] = df_renamed.loc[i]["Peso final"]
            df_renamed.loc[i,"BMI"] = df_renamed.loc[i]["IMC Final"]
            df_renamed.loc[i,"Fat"] = df_renamed.loc[i]["Grasa final"]
            df_renamed.loc[i,"CVRI"] = df_renamed.loc[i]["IRCV Final"] 
            df_renamed.loc[i,"Bpmin"] = df_renamed.loc[i]["Bpmin final"] 
            df_renamed.loc[i,"Bpmax"] = df_renamed.loc[i]["Bpmax final"] 
            df_renamed.loc[i,"Frec"] = df_renamed.loc[i]["Frec final"] 
        
    df_renamed.drop(columns = ["Peso inicial", "Peso final", "Delta Peso", "Talla", "IMC Inicial", "IMC Final", "Delta IMC", "Grasa inicial", "Grasa final", "Delta Grasa", "IRCV Final", "IRCV inicial", "Bpmin final", "Bpmin inicial", "Bpmax final", "Bpmax inicial", "Frec final", "Frec inicial",], inplace=True )
  
  df_renamed.drop(columns = ["Unnamed: 0", "grouping"], inplace=True )
  df_renamed.fillna(0, inplace=True)
  return df_renamed


def predictSex(pathToTable):
    df = fullRead(pathToTable, ",")
    enc = OrdinalEncoder()
    enc.fit(df[["Sweetener", "Time"]])
    df[["Sweetener", "Time"]] = enc.transform(df[["Sweetener", "Time"]])
    X, y = df.drop(["numVol", "Sex"], axis=1), df["Sex"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    classifier = TabPFNClassifier(device='cpu', N_ensemble_configurations=32)

    classifier.fit(X_train, y_train)
    y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)

    print('Accuracy in sex clasiffication', pathToTable, accuracy_score(y_test, y_eval))

def predictSweet(pathToTable):
    df = fullRead(pathToTable, ",", anthro=True)
    df = df[df["Time"] == "Final"]
    enc = OrdinalEncoder()
    enc.fit(df[["Sex"]])
    df[["Sex"]] = enc.transform(df[["Sex"]])
    X, y = df.drop(["numVol", "Sweetener", "Time"], axis=1), df["Sweetener"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    classifier = TabPFNClassifier(device='cpu', N_ensemble_configurations=32)

    classifier.fit(X_train, y_train)
    y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)

    print('Accuracy in sweetener clasiffication', pathToTable, accuracy_score(y_test, y_eval))    

In [5]:
predictSex("data/urineFlav_ord.csv")
predictSweet("data/urineFlav_ord.csv")
predictSex("data/urineAnt_ord.csv")
predictSweet("data/urineAnt_ord.csv")
predictSex("data/plasmFlav_ord.csv")
predictSweet("data/plasmFlav_ord.csv")
predictSex("data/plasmAnt_ord.csv")
predictSweet("data/plasmAnt_ord.csv")

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Accuracy in sex clasiffication data/urineFlav_ord.csv 0.6024096385542169
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Accuracy in sweetener clasiffication data/urineFlav_ord.csv 0.5476190476190477
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Accuracy in sex clasiffication data/urineAnt_ord.csv 0.6746987951807228
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Accuracy in sweetener clasiffication data/urineAnt_ord.csv 0.47619047619047616
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Accuracy in sex clasiffication data/plasmFlav_ord.csv 0.4878048780487805
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
Accuracy in sweetener clasiffication data/plasmFlav_ord.csv 