<a href="https://colab.research.google.com/github/ihstepura/publicgenai/blob/main/task6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#Task 6
#TabPFN is performing a little bit better than Random Forest and XGBoost
#Random Forest  0.816000  0.753386
#XGBoost        0.812667  0.759529
#TabPFN         0.821467  0.783043
#The bigger difference is in the second metric, ROC-AUC
#TabPFN is better at ranking defaulters above non-defaulters across thresholds, not just predicting the majority class correctly.
#TabPFN generalised well on this dataset and captured useful patterns without heavy feature engineering or extensive hyperparameter tuning.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
!pip install tabpfn-client # Install the missing package
from tabpfn_client import TabPFNClassifier # Changed import to directly from tabpfn_client
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

# 1. Load Dataset (UCI Credit Card Default)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"
df = pd.read_excel(url, header=1)

# Features and Target
X = df.drop(columns=['ID', 'default payment next month'])
y = df['default payment next month']

# 2. Split Data (25% Test Set)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# 3. Initialize Models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
# Updated TabPFNClassifier initialization for tabpfn_client, removed 'device' and 'N_ensemble_configurations' arguments
tabpfn = TabPFNClassifier(model_path='v2.5_default', ignore_pretraining_limits=True)

# 4. Train and Predict
models = {"Random Forest": rf_model, "XGBoost": xgb_model, "TabPFN": tabpfn}
results = {}

for name, model in models.items():
    if name == "TabPFN":
        # Removed data subsetting for TabPFN
        model.fit(X_train, y_train)
    else:
        model.fit(X_train, y_train)

    preds = model.predict(X_test)
    probs = model.predict_proba(X_test)[:, 1]

    results[name] = {
        "Accuracy": accuracy_score(y_test, preds),
        "ROC-AUC": roc_auc_score(y_test, probs)
    }

# 5. Display Comparison
print(pd.DataFrame(results).T)

Collecting tabpfn-client
  Downloading tabpfn_client-0.2.8-py3-none-any.whl.metadata (14 kB)
Collecting backoff<=2.2.1,>=2.2.0 (from tabpfn-client)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting password-strength<=0.0.3.post2,>=0.0.3.post2 (from tabpfn-client)
  Downloading password_strength-0.0.3.post2-py2.py3-none-any.whl.metadata (10 kB)
Collecting sseclient-py<=1.8.0,>=1.8.0 (from tabpfn-client)
  Downloading sseclient_py-1.8.0-py2.py3-none-any.whl.metadata (2.0 kB)
Downloading tabpfn_client-0.2.8-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading backoff-2.2.1-py3-none-any.whl (15 kB)
Downloading password_strength-0.0.3.post2-py2.py3-none-any.whl (12 kB)
Downloading sseclient_py-1.8.0-py2.py3-none-any.whl (8.8 kB)
Installing collected packages: sseclient-py, password-strength, backoff, tabpfn-client
Successfully installed backoff-2.2.1 password-st

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Opening browser for login. Please complete the login/registration process in your browser and return here.


Could not open browser automatically. Falling back to command-line login...



1


y


ihor.stepura@warwick.ac.uk


Output()

Output()


Password: ··········
Confirm password: ··········


y


Ihor


Stepura


University of Warwick


c


Use TabPFN to predict the outcome


y


Output()

fHkRoApW


Output()

Processing: 100%|██████████| [00:04<00:00]
Processing: 100%|██████████| [00:04<00:00]

               Accuracy   ROC-AUC
Random Forest  0.816000  0.753386
XGBoost        0.812667  0.759529
TabPFN         0.821467  0.783043



