In [None]:
!pip -q install "tensorflow==2.19.*" "tensorflow-text==2.19.*" "tf-keras==2.19.*" "tensorflow-decision-forests==1.12.*"
import os, sys; os.kill(os.getpid(), 9)

In [17]:
import tensorflow as tf
print("TF:", tf.__version__)
print("GPUs:", tf.config.list_physical_devices('GPU'))

TF: 2.19.0
GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [18]:
import ast, re, math, numpy as np, pandas as pd, tensorflow as tf
from tensorflow.keras import layers, mixed_precision, Model
from sklearn.metrics import classification_report, f1_score, accuracy_score

##This will configure GPU +AMP
gpus = tf.config.list_physical_devices('GPU')
for g in gpus:
    try: tf.config.experimental.set_memory_growth(g, True)
    except: pass
mixed_precision.set_global_policy('float32')
print("TF:", tf.__version__, "GPUs:", gpus, "AMP:", mixed_precision.global_policy())

TF: 2.19.0 GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')] AMP: <DTypePolicy "float32">


In [19]:
##This will load the datasets
KAGGLE_CSV = "kaggle_data.csv"
REDDIT_CSV = "reddit_full.csv"

##This will set out 16 classifiers
MBTI16 = ['ISTJ','ISFJ','INFJ','INTJ','ISTP','ISFP','INFP','INTP',
          'ENTJ','ENTP','ENFJ','ENFP','ESTJ','ESFJ','ESTP','ESFP']
lab2id = {l:i for i,l in enumerate(MBTI16)}

def load_df(path, text_col_guess=("posts","body"), label_col_guess=("type","class")):
    df = pd.read_csv(path)
    ##This will figure out columns
    text_col  = next((c for c in text_col_guess  if c in df.columns), None)
    label_col = next((c for c in label_col_guess if c in df.columns), None)
    assert text_col and label_col, f"Could not find text/label in {path}. Columns: {df.columns.tolist()}"

    def liststr_to_str(x):
        if isinstance(x, str) and x.startswith('['):
            try:
                toks = ast.literal_eval(x)
                if isinstance(toks, list): return " ".join(map(str, toks))
            except Exception:
                pass
        return str(x)

    df = df[[text_col, label_col]].rename(columns={text_col:'text', label_col:'label'})
    df['text']  = df['text'].map(liststr_to_str)
    df['label'] = df['label'].astype(str)
    df = df[df['label'].isin(MBTI16)].copy()
    df['y'] = df['label'].map(lab2id).astype(int)
    return df

df_k = load_df(KAGGLE_CSV, text_col_guess=("posts","body"),  label_col_guess=("type","class"))
df_r = load_df(REDDIT_CSV, text_col_guess=("body","posts"),  label_col_guess=("class","type"))
print("Kaggle rows:", len(df_k), "Reddit rows:", len(df_r))


Kaggle rows: 410915 Reddit rows: 1651100


In [20]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

VOCAB_SZ = 20000
MAX_LEN  = 160

tok = Tokenizer(num_words=VOCAB_SZ, oov_token="<UNK>")
tok.fit_on_texts(pd.concat([df_k['text'], df_r['text']], axis=0))

def featurize(texts):
    seq = tok.texts_to_sequences(texts)
    return pad_sequences(seq, maxlen=MAX_LEN, padding='post', truncating='post')

Xk, yk = featurize(df_k['text']), df_k['y'].to_numpy()
Xr, yr = featurize(df_r['text']), df_r['y'].to_numpy()


In [21]:
AUTOTUNE = tf.data.AUTOTUNE
BATCH = 128 if gpus else 32

def make_split(X, y, frac=0.9, shuffle=True):
    n = len(X); k = int(n*frac)
    Xtr, Xva = X[:k], X[k:]
    ytr, yva = y[:k], y[k:]
    if shuffle:
        idx = np.random.permutation(len(Xtr))
        Xtr, ytr = Xtr[idx], ytr[idx]
    ds_tr = tf.data.Dataset.from_tensor_slices((Xtr, ytr)).batch(BATCH).prefetch(AUTOTUNE)
    ds_va = tf.data.Dataset.from_tensor_slices((Xva, yva)).batch(BATCH).prefetch(AUTOTUNE)
    return ds_tr, ds_va, (Xva, yva)

ds_k_tr, ds_k_va, (Xk_va, yk_va) = make_split(Xk, yk)
ds_r_tr, ds_r_va, (Xr_va, yr_va) = make_split(Xr, yr)


In [22]:
##This will establish our CNN function
def make_cnn(num_classes=16, vocab_size=VOCAB_SZ, embed_dim=96, seq_len=MAX_LEN, filters=192, drop=0.3):
    inp = layers.Input((seq_len,), name="input_ids")
    emb = layers.Embedding(vocab_size, embed_dim, name="embedding")(inp)
    c3  = layers.Conv1D(filters, 3, activation="relu")(emb)
    c4  = layers.Conv1D(filters, 4, activation="relu")(emb)
    c5  = layers.Conv1D(filters, 5, activation="relu")(emb)
    p3, p4, p5 = layers.GlobalMaxPooling1D()(c3), layers.GlobalMaxPooling1D()(c4), layers.GlobalMaxPooling1D()(c5)
    x = layers.Concatenate()([p3,p4,p5])
    x = layers.Dropout(drop)(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(drop)(x)
    ##This will keep logits/probs in float32 for numerical stability with AMP
    out = layers.Dense(num_classes, activation="softmax", dtype="float32")(x)
    m = Model(inp, out)
    m.compile(
        optimizer=tf.keras.optimizers.Adam(2e-4),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return m

cb = [
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=2, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=1, verbose=1)
]


In [23]:
##This will train the function in Kaggle/Reddit datasets
model_k = make_cnn()
print("=== Train on Kaggle ===")
hist_k = model_k.fit(ds_k_tr, validation_data=ds_k_va, epochs=6, verbose=1)

model_r = make_cnn()
print("\n=== Train on Reddit ===")
hist_r = model_r.fit(ds_r_tr, validation_data=ds_r_va, epochs=6, verbose=1)


=== Train on Kaggle ===
Epoch 1/6
[1m2890/2890[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 17ms/step - accuracy: 0.2042 - loss: 2.3118 - val_accuracy: 0.2279 - val_loss: 2.2233
Epoch 2/6
[1m2890/2890[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - accuracy: 0.2349 - loss: 2.2354 - val_accuracy: 0.2301 - val_loss: 2.2204
Epoch 3/6
[1m2890/2890[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - accuracy: 0.2571 - loss: 2.1891 - val_accuracy: 0.2264 - val_loss: 2.2342
Epoch 4/6
[1m2890/2890[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - accuracy: 0.2808 - loss: 2.1354 - val_accuracy: 0.2255 - val_loss: 2.2612
Epoch 5/6
[1m2890/2890[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - accuracy: 0.3090 - loss: 2.0648 - val_accuracy: 0.2199 - val_loss: 2.3038
Epoch 6/6
[1m2890/2890[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - accuracy: 0.3453 - loss: 1.9714 - val_accuracy: 0.2134 - val

In [24]:
##This will create an eval report of the model
def eval_report(model, X, y, tag):
    yhat = model.predict(tf.data.Dataset.from_tensor_slices(X).batch(BATCH), verbose=0)
    pred = np.argmax(yhat, axis=1)
    print(f"\n--- {tag} ---")
    print("Accuracy:", accuracy_score(y, pred))
    print("Macro-F1:", f1_score(y, pred, average='macro'))
    print(classification_report(y, pred, target_names=MBTI16, digits=3))
    return accuracy_score(y, pred), f1_score(y, pred, average='macro')

##This will be in-domain
acc_k_in, f1_k_in = eval_report(model_k, Xk_va, yk_va, "Kaggle → Kaggle (val)")
acc_r_in, f1_r_in = eval_report(model_r, Xr_va, yr_va, "Reddit → Reddit (val)")

##This will be cross-domain
acc_k2r, f1_k2r = eval_report(model_k, Xr_va, yr_va, "Kaggle → Reddit (cross)")
acc_r2k, f1_r2k = eval_report(model_r, Xk_va, yk_va, "Reddit → Kaggle (cross)")

print("\n=== Cross-domain drop (ΔF1 = within − cross) ===")
print(f"Kaggle-trained ΔF1: {f1_k_in - f1_k2r:.4f}")
print(f"Reddit-trained ΔF1: {f1_r_in - f1_r2k:.4f}")



--- Kaggle → Kaggle (val) ---
Accuracy: 0.21337486615399592
Macro-F1: 0.07329459619773335
              precision    recall  f1-score   support

        ISTJ      0.226     0.006     0.012      1093
        ISFJ      0.000     0.000     0.000       642
        INFJ      0.213     0.251     0.230      7245
        INTJ      0.162     0.146     0.154      5108
        ISTP      0.117     0.018     0.031      1511
        ISFP      0.333     0.001     0.002      1262
        INFP      0.246     0.495     0.328      8495
        INTP      0.218     0.225     0.222      6441
        ENTJ      0.077     0.001     0.002      1271
        ENTP      0.115     0.071     0.088      2963
        ENFJ      0.000     0.000     0.000       693
        ENFP      0.145     0.082     0.104      3689
        ESTJ      0.000     0.000     0.000       196
        ESFJ      0.000     0.000     0.000       185
        ESTP      0.000     0.000     0.000       250
        ESFP      0.000     0.000     0.000 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Reddit → Reddit (val) ---
Accuracy: 0.3135000908485252
Macro-F1: 0.10472101921417397
              precision    recall  f1-score   support

        ISTJ      0.750     0.002     0.004      1660
        ISFJ      0.000     0.000     0.000       694
        INFJ      0.280     0.201     0.234     19396
        INTJ      0.319     0.301     0.310     35773
        ISTP      0.282     0.013     0.025      5081
        ISFP      0.580     0.026     0.051      1097
        INFP      0.264     0.174     0.210     17777
        INTP      0.331     0.657     0.440     45230
        ENTJ      0.312     0.020     0.038      4259
        ENTP      0.281     0.169     0.211     19716
        ENFJ      0.277     0.025     0.045      2077
        ENFP      0.215     0.073     0.109      9699
        ESTJ      0.000     0.000     0.000       429
        ESFJ      0.000     0.000     0.000       272
        ESTP      0.000     0.000     0.000      1210
        ESFP      0.000     0.000     0.000  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Kaggle → Reddit (cross) ---
Accuracy: 0.18839561504451577
Macro-F1: 0.06429544744112084
              precision    recall  f1-score   support

        ISTJ      0.000     0.000     0.000      1660
        ISFJ      0.000     0.000     0.000       694
        INFJ      0.159     0.281     0.203     19396
        INTJ      0.277     0.122     0.169     35773
        ISTP      0.098     0.014     0.024      5081
        ISFP      0.000     0.000     0.000      1097
        INFP      0.128     0.523     0.206     17777
        INTP      0.338     0.227     0.271     45230
        ENTJ      0.107     0.001     0.001      4259
        ENTP      0.190     0.064     0.095     19716
        ENFJ      0.000     0.000     0.000      2077
        ENFP      0.085     0.044     0.058      9699
        ESTJ      0.000     0.000     0.000       429
        ESFJ      0.000     0.000     0.000       272
        ESTP      0.000     0.000     0.000      1210
        ESFP      0.000     0.000     0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Reddit → Kaggle (cross) ---
Accuracy: 0.18619195950549985
Macro-F1: 0.06344068883952056
              precision    recall  f1-score   support

        ISTJ      0.000     0.000     0.000      1093
        ISFJ      0.000     0.000     0.000       642
        INFJ      0.243     0.120     0.160      7245
        INTJ      0.149     0.242     0.184      5108
        ISTP      0.121     0.005     0.009      1511
        ISFP      0.111     0.001     0.002      1262
        INFP      0.300     0.140     0.191      8495
        INTP      0.181     0.603     0.278      6441
        ENTJ      0.064     0.002     0.005      1271
        ENTP      0.112     0.102     0.107      2963
        ENFJ      0.077     0.006     0.011       693
        ENFP      0.176     0.043     0.069      3689
        ESTJ      0.000     0.000     0.000       196
        ESFJ      0.000     0.000     0.000       185
        ESTP      0.000     0.000     0.000       250
        ESFP      0.000     0.000     0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**SECOND PART**

In [25]:
import numpy as np

##This will take the int labels 0..15 in MBTI16 order:
MBTI16 = ['ISTJ','ISFJ','INFJ','INTJ','ISTP','ISFP','INFP','INTP','ENTJ','ENTP','ENFJ','ENFP','ESTJ','ESFJ','ESTP','ESFP']
lab2id = {l:i for i,l in enumerate(MBTI16)}

##This will split one type into 4 bits: I/E, S/N, T/F, J/P (1=the second letter)
def type_to_axes(idx):
    t = MBTI16[idx]
    return (
        0 if t[0]=='I' else 1,
        0 if t[1]=='S' else 1,
        0 if t[2]=='T' else 1,
        0 if t[3]=='J' else 1
    )

def labels_to_axes(y):
    axes = np.array([type_to_axes(i) for i in y], dtype=np.int32)
    ##The shape here: (N, 4) columns = [IE, SN, TF, JP]
    return axes

yk_axes = labels_to_axes(yk)
yr_axes = labels_to_axes(yr)


In [26]:
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import StratifiedShuffleSplit
import tensorflow as tf
import numpy as np

def axis_class_weights(y_axes):
    weights = []
    for j in range(4):
        w = compute_class_weight("balanced", classes=np.array([0,1]), y=y_axes[:,j])
        weights.append({0: float(w[0]), 1: float(w[1])})
    return weights

AUTOTUNE = tf.data.AUTOTUNE
BATCH = 256 if tf.config.list_physical_devices('GPU') else 32

def make_axis_ds_with_weights(X, y_axes, wdicts, val_frac=0.1):
    ##This will stratify by 16 way code reconstructed from axes
    code16 = y_axes[:,0]*8 + y_axes[:,1]*4 + y_axes[:,2]*2 + y_axes[:,3]
    sss = StratifiedShuffleSplit(n_splits=1, test_size=val_frac, random_state=42)
    idx_tr, idx_va = next(sss.split(X, code16))

    Xtr, Xva = X[idx_tr], X[idx_va]
    ytr, yva = y_axes[idx_tr], y_axes[idx_va]

    ##This will set y dicts
    def to_y_dict(Y):
        Y = Y.astype(np.float32)
        return {"ie": Y[:,0], "sn": Y[:,1], "tf": Y[:,2], "jp": Y[:,3]}

    ytr_dict = to_y_dict(ytr)
    yva_dict = to_y_dict(yva)

    ##This will be the sample weights as ordered tuples (ie, sn, tf, jp)
    ##training weights for example
    Wtr = np.empty_like(ytr, dtype=np.float32)
    for j in range(4):
        Wtr[:, j] = np.where(ytr[:, j]==1, wdicts[j][1], wdicts[j][0])
    sw_tr_tuple = (Wtr[:,0], Wtr[:,1], Wtr[:,2], Wtr[:,3])

    ##Here validation weights = ones
    sw_va_tuple = (
        np.ones(len(yva), np.float32),
        np.ones(len(yva), np.float32),
        np.ones(len(yva), np.float32),
        np.ones(len(yva), np.float32),
    )

    ds_tr = tf.data.Dataset.from_tensor_slices((Xtr, ytr_dict, sw_tr_tuple)) \
                           .shuffle(10000).batch(BATCH).prefetch(AUTOTUNE)
    ds_va = tf.data.Dataset.from_tensor_slices((Xva, yva_dict, sw_va_tuple)) \
                           .batch(BATCH).prefetch(AUTOTUNE)
    return ds_tr, ds_va, (Xva, yva)

##This will rebuild the datasets
cw_k_axes = axis_class_weights(yk_axes)
cw_r_axes = axis_class_weights(yr_axes)
ds_k_tr, ds_k_va, (Xk_va, yk_axes_va) = make_axis_ds_with_weights(Xk, yk_axes, cw_k_axes)
ds_r_tr, ds_r_va, (Xr_va, yr_axes_va) = make_axis_ds_with_weights(Xr, yr_axes, cw_r_axes)

In [27]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def make_cnn_axes(vocab_size=20000, embed_dim=96, seq_len=160, filters=192, drop=0.3):
    inp = layers.Input((seq_len,), name="input_ids")
    emb = layers.Embedding(vocab_size, embed_dim, name="embedding")(inp)
    c3  = layers.Conv1D(filters, 3, activation="relu")(emb)
    c4  = layers.Conv1D(filters, 4, activation="relu")(emb)
    c5  = layers.Conv1D(filters, 5, activation="relu")(emb)
    x   = layers.Concatenate()([
        layers.GlobalMaxPooling1D()(c3),
        layers.GlobalMaxPooling1D()(c4),
        layers.GlobalMaxPooling1D()(c5),
    ])
    x   = layers.Dropout(drop)(x)
    x   = layers.Dense(256, activation="relu")(x)
    x   = layers.Dropout(drop)(x)
    ##This will use 4 independent binary heads and keepss outputs in float32 for AMP stability
    out_ie = layers.Dense(1, activation="sigmoid", name="ie", dtype="float32")(x)
    out_sn = layers.Dense(1, activation="sigmoid", name="sn", dtype="float32")(x)
    out_tf = layers.Dense(1, activation="sigmoid", name="tf", dtype="float32")(x)
    out_jp = layers.Dense(1, activation="sigmoid", name="jp", dtype="float32")(x)
    m = Model(inp, [out_ie, out_sn, out_tf, out_jp])
    m.compile(
        optimizer=tf.keras.optimizers.Adam(1e-4),
        loss={"ie":"binary_crossentropy","sn":"binary_crossentropy",
              "tf":"binary_crossentropy","jp":"binary_crossentropy"},
        metrics={"ie":["accuracy"],"sn":["accuracy"],"tf":["accuracy"],"jp":["accuracy"]},
    )
    return m


In [28]:
cb = [
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, verbose=1),
]

m_k = make_cnn_axes(vocab_size=VOCAB_SZ, seq_len=MAX_LEN)
print("=== Train axes on Kaggle ===")
m_k.fit(ds_k_tr, validation_data=ds_k_va, epochs=12, callbacks=cb, verbose=1)

m_r = make_cnn_axes(vocab_size=VOCAB_SZ, seq_len=MAX_LEN)
print("\n=== Train axes on Reddit ===")
m_r.fit(ds_r_tr, validation_data=ds_r_va, epochs=12, callbacks=cb, verbose=1)


=== Train axes on Kaggle ===
Epoch 1/12
[1m1445/1445[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 35ms/step - ie_accuracy: 0.5148 - ie_loss: 0.6930 - jp_accuracy: 0.5073 - jp_loss: 0.6930 - loss: 2.7718 - sn_accuracy: 0.5639 - sn_loss: 0.6932 - tf_accuracy: 0.5131 - tf_loss: 0.6927 - val_ie_accuracy: 0.6305 - val_ie_loss: 0.6860 - val_jp_accuracy: 0.5332 - val_jp_loss: 0.6910 - val_loss: 2.7369 - val_sn_accuracy: 0.6842 - val_sn_loss: 0.6778 - val_tf_accuracy: 0.5577 - val_tf_loss: 0.6821 - learning_rate: 1.0000e-04
Epoch 2/12
[1m1445/1445[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 30ms/step - ie_accuracy: 0.5259 - ie_loss: 0.6904 - jp_accuracy: 0.5241 - jp_loss: 0.6917 - loss: 2.7476 - sn_accuracy: 0.5483 - sn_loss: 0.6894 - tf_accuracy: 0.5699 - tf_loss: 0.6761 - val_ie_accuracy: 0.5025 - val_ie_loss: 0.6967 - val_jp_accuracy: 0.5259 - val_jp_loss: 0.6916 - val_loss: 2.7394 - val_sn_accuracy: 0.5153 - val_sn_loss: 0.6908 - val_tf_accuracy: 0.6012 - val_tf_loss

<keras.src.callbacks.history.History at 0x7a8452995e50>

In [29]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, classification_report

##This is our helpers again
MBTI16 = ['ISTJ','ISFJ','INFJ','INTJ','ISTP','ISFP','INFP','INTP',
          'ENTJ','ENTP','ENFJ','ENFP','ESTJ','ESFJ','ESTP','ESFP']
lab2id = {l:i for i,l in enumerate(MBTI16)}

def axes_to_type_str(bits):
    out = []
    for b in bits:
        out.append( ('I' if b[0]==0 else 'E') +
                    ('S' if b[1]==0 else 'N') +
                    ('T' if b[2]==0 else 'F') +
                    ('J' if b[3]==0 else 'P') )
    return out

def eval_axes_model(model, Xva, y_axes_va, tag, batch_size=256):
    ##This will predict probabilities for each head
    ds = tf.data.Dataset.from_tensor_slices(Xva).batch(batch_size)
    p_ie, p_sn, p_tf, p_jp = model.predict(ds, verbose=0)

    ##This will set Threshold at 0.5 → bits
    pred_bits = np.stack([
        (p_ie[:,0] >= 0.5).astype(int),
        (p_sn[:,0] >= 0.5).astype(int),
        (p_tf[:,0] >= 0.5).astype(int),
        (p_jp[:,0] >= 0.5).astype(int),
    ], axis=1)

    ##These are our per-axis metrics
    axis_names = ["IE","SN","TF","JP"]
    print(f"\n--- {tag}: Per-axis metrics ---")
    for j,name in enumerate(axis_names):
        acc = accuracy_score(y_axes_va[:,j], pred_bits[:,j])
        f1  = f1_score(y_axes_va[:,j], pred_bits[:,j], average="macro")
        print(f"{name}: acc={acc:.3f}  macroF1={f1:.3f}")

    ##This will reconstruct 16-type predictions
    true_types = axes_to_type_str(y_axes_va)
    pred_types = axes_to_type_str(pred_bits)
    y_true = np.array([lab2id[t] for t in true_types])
    y_pred = np.array([lab2id[t] for t in pred_types])

    acc16 = accuracy_score(y_true, y_pred)
    f116  = f1_score(y_true, y_pred, average="macro")

    print(f"\n--- {tag}: 16-way metrics ---")
    print("Accuracy:", acc16)
    print("Macro-F1:", f116)
    print(classification_report(y_true, y_pred, target_names=MBTI16, digits=3))

    return acc16, f116

def cross_domain_drop(within_f1, cross_f1):
    return float(within_f1 - cross_f1)

##This will run all the evals
BATCH_EVAL = 256 if tf.config.list_physical_devices('GPU') else 64

##This is 1) Within-domain
acc_k_within, f1_k_within = eval_axes_model(m_k, Xk_va, yk_axes_va, "Kaggle → Kaggle (val)", BATCH_EVAL)
acc_r_within, f1_r_within = eval_axes_model(m_r, Xr_va, yr_axes_va, "Reddit → Reddit (val)", BATCH_EVAL)

##This is 2) Cross-domain
acc_k_cross,  f1_k_cross  = eval_axes_model(m_k, Xr_va, yr_axes_va, "Kaggle → Reddit (cross)", BATCH_EVAL)
acc_r_cross,  f1_r_cross  = eval_axes_model(m_r, Xk_va, yk_axes_va, "Reddit → Kaggle (cross)", BATCH_EVAL)

##This is 3) F1 summary
print("\n=== Cross-domain drop (ΔF1 = within − cross) ===")
print(f"Kaggle-trained ΔF1: {cross_domain_drop(f1_k_within, f1_k_cross):.4f}")
print(f"Reddit-trained ΔF1: {cross_domain_drop(f1_r_within, f1_r_cross):.4f}")



--- Kaggle → Kaggle (val): Per-axis metrics ---
IE: acc=0.600  macroF1=0.531
SN: acc=0.638  macroF1=0.505
TF: acc=0.599  macroF1=0.597
JP: acc=0.541  macroF1=0.536

--- Kaggle → Kaggle (val): 16-way metrics ---
Accuracy: 0.13328628443492652
Macro-F1: 0.0894238515343283
              precision    recall  f1-score   support

        ISTJ      0.048     0.105     0.066       963
        ISFJ      0.033     0.077     0.046       789
        INFJ      0.231     0.198     0.213      6999
        INTJ      0.185     0.158     0.171      5113
        ISTP      0.073     0.101     0.085      1598
        ISFP      0.048     0.077     0.059      1246
        INFP      0.296     0.141     0.191      8696
        INTP      0.232     0.102     0.142      6144
        ENTJ      0.051     0.092     0.066      1099
        ENTP      0.127     0.104     0.114      3302
        ENFJ      0.038     0.084     0.052       910
        ENFP      0.130     0.124     0.127      3208
        ESTJ      0.011   

**THIRD PART**

In [31]:
import numpy as np
from itertools import product
from sklearn.metrics import f1_score, accuracy_score, classification_report
import tensorflow as tf

##MBTI helpers again
MBTI16 = [
    'ISTJ','ISFJ','INFJ','INTJ',
    'ISTP','ISFP','INFP','INTP',
    'ENTJ','ENTP','ENFJ','ENFP',
    'ESTJ','ESFJ','ESTP','ESFP'
]
lab2id = {l:i for i,l in enumerate(MBTI16)}

##This will map each 16-type to axis bits [IE,SN,TF,JP] where bit=1 means E/N/F/P
def type_to_bits(t):
    return np.array([
        1 if t[0]=='E' else 0,
        1 if t[1]=='N' else 0,
        1 if t[2]=='F' else 0,
        1 if t[3]=='P' else 0,
    ], dtype=int)

TYPE_BITS = np.stack([type_to_bits(t) for t in MBTI16], axis=0)

BIT_WEIGHTS = np.array([8, 4, 2, 1], dtype=np.int8)

def axes_to_type_str(bits):
    out = []
    for b in bits:
        out.append(
            ('I' if b[0]==0 else 'E') +
            ('S' if b[1]==0 else 'N') +
            ('T' if b[2]==0 else 'F') +
            ('J' if b[3]==0 else 'P')
        )
    return out

##This will predict axis probablity
def predict_axis_probs(model, X, batch_size=512):
    ds = tf.data.Dataset.from_tensor_slices(X).batch(batch_size)
    p_ie, p_sn, p_tf, p_jp = model.predict(ds, verbose=0)
    ##This will squeeze to shape (N,) each
    return p_ie[:, 0], p_sn[:, 0], p_tf[:, 0], p_jp[:, 0]

##This will implement MAP decoding over 16 types
def map_decode(p_ie, p_sn, p_tf, p_jp, priors=None):
    """
    Compute P(type) = Π_j [p_j if bit==1 else (1-p_j)] * prior[type].
    p_* are shape (N,), priors is shape (16,) or None for uniform.
    Returns: y_pred_type_ids (N,)
    """
    N = p_ie.shape[0]
    P = np.stack([p_ie, p_sn, p_tf, p_jp], axis=1)

    like = np.ones((N, 16), dtype=np.float64)
    for j in range(4):
        pj = P[:, j][:, None]
        bj = TYPE_BITS[None, :, j]
        like *= np.where(bj == 1, pj, 1.0 - pj)

    if priors is None:
        post = like
    else:
        priors = np.asarray(priors, dtype=np.float64)[None, :]
        post = like * priors

    y_pred = np.argmax(post, axis=1).astype(int)
    return y_pred

def eval_map(model, X, y_axes_true, tag, priors=None, batch_size=512):
    ##These are true 16-type ids from axis labels
    y_true_types = axes_to_type_str(y_axes_true.astype(int))
    y_true = np.array([lab2id[t] for t in y_true_types])

    ##This will predict axis probs
    p_ie, p_sn, p_tf, p_jp = predict_axis_probs(model, X, batch_size=batch_size)

    ##This is the MAP decoder
    y_pred = map_decode(p_ie, p_sn, p_tf, p_jp, priors=priors)

    acc = accuracy_score(y_true, y_pred)
    f1  = f1_score(y_true, y_pred, average="macro")

    print(f"\n--- {tag} (MAP decode) ---")
    if priors is None:
        print("Priors: uniform")
    else:
        print("Priors: train distribution")
    print("Accuracy:", acc)
    print("Macro-F1:", f1)
    print(classification_report(y_true, y_pred, target_names=MBTI16, digits=3))

    return acc, f1

def estimate_priors_from_axes(y_axes):
    """
    Estimate 16-type priors from axis labels by counting each 16-type code derived from axes.
    """
    bits = y_axes.astype(int)
    types = axes_to_type_str(bits)
    ids = np.array([lab2id[t] for t in types])
    counts = np.bincount(ids, minlength=16).astype(np.float64)
    priors = counts / counts.sum()
    ##This will set a small floor to avoid exact zero
    priors = np.clip(priors, 1e-6, None)
    priors /= priors.sum()
    return priors

##This is a fast threshold search over axes
def grid_search_thresholds_for_16way_fast(p_ie, p_sn, p_tf, p_jp, y_axes_true,
                                          t_values=None):
    """
    Faster threshold search:
      - operates in integer MBTI IDs (0..15), no strings in the inner loop
      - uses a small grid of thresholds per axis (default: 5 values in [0.35, 0.65])
    """
    if t_values is None:
        t_values = np.linspace(0.35, 0.65, 5)

    ##This will get true type IDs from true axis bits
    y_true_bits = y_axes_true.astype(int)
    y_true_ids  = y_true_bits @ BIT_WEIGHTS

    best = {"f1": -1.0, "acc": 0.0, "thr": (0.5, 0.5, 0.5, 0.5)}

    for t_ie, t_sn, t_tf, t_jp in product(t_values, repeat=4):
        ##This sets threshold each axis once for this combo
        ie_bits = (p_ie >= t_ie).astype(int)
        sn_bits = (p_sn >= t_sn).astype(int)
        tf_bits = (p_tf >= t_tf).astype(int)
        jp_bits = (p_jp >= t_jp).astype(int)

        ##This will stack into (N,4) and map to 0..15 IDs
        pred_bits = np.stack([ie_bits, sn_bits, tf_bits, jp_bits], axis=1)
        y_pred_ids = pred_bits @ BIT_WEIGHTS

        f1  = f1_score(y_true_ids, y_pred_ids, average="macro")
        if f1 > best["f1"]:
            acc = accuracy_score(y_true_ids, y_pred_ids)
            best["f1"]  = float(f1)
            best["acc"] = float(acc)
            best["thr"] = (float(t_ie), float(t_sn), float(t_tf), float(t_jp))

    return best


##This will run the improved decoders
print(">>> MAP decoding on Kaggle val")
priors_k = estimate_priors_from_axes(yk_axes_va)
acc_k_map_u, f1_k_map_u = eval_map(m_k, Xk_va, yk_axes_va, "Kaggle→Kaggle", priors=None)
acc_k_map_p, f1_k_map_p = eval_map(m_k, Xk_va, yk_axes_va, "Kaggle→Kaggle", priors=priors_k)

print("\n>>> MAP decoding on Reddit val")
priors_r = estimate_priors_from_axes(yr_axes_va)
acc_r_map_u, f1_r_map_u = eval_map(m_r, Xr_va, yr_axes_va, "Reddit→Reddit", priors=None)
acc_r_map_p, f1_r_map_p = eval_map(m_r, Xr_va, yr_axes_va, "Reddit→Reddit", priors=priors_r)

##This will tune the threshold with fast search
print("\n>>> Threshold search (Kaggle val)")
p_ie_k, p_sn_k, p_tf_k, p_jp_k = predict_axis_probs(m_k, Xk_va)
best_k = grid_search_thresholds_for_16way_fast(
    p_ie_k, p_sn_k, p_tf_k, p_jp_k, yk_axes_va
)
print("Best thresholds (ie,sn,tf,jp):", best_k["thr"],
      "  acc:", best_k["acc"], "  macroF1:", best_k["f1"])

print("\n>>> Threshold search (Reddit val)")
p_ie_r, p_sn_r, p_tf_r, p_jp_r = predict_axis_probs(m_r, Xr_va)
best_r = grid_search_thresholds_for_16way_fast(
    p_ie_r, p_sn_r, p_tf_r, p_jp_r, yr_axes_va
)
print("Best thresholds (ie,sn,tf,jp):", best_r["thr"],
      "  acc:", best_r["acc"], "  macroF1:", best_r["f1"])

>>> MAP decoding on Kaggle val

--- Kaggle→Kaggle (MAP decode) ---
Priors: uniform
Accuracy: 0.13328628443492652
Macro-F1: 0.0894238515343283
              precision    recall  f1-score   support

        ISTJ      0.048     0.105     0.066       963
        ISFJ      0.033     0.077     0.046       789
        INFJ      0.231     0.198     0.213      6999
        INTJ      0.185     0.158     0.171      5113
        ISTP      0.073     0.101     0.085      1598
        ISFP      0.048     0.077     0.059      1246
        INFP      0.296     0.141     0.191      8696
        INTP      0.232     0.102     0.142      6144
        ENTJ      0.051     0.092     0.066      1099
        ENTP      0.127     0.104     0.114      3302
        ENFJ      0.038     0.084     0.052       910
        ENFP      0.130     0.124     0.127      3208
        ESTJ      0.011     0.069     0.019       188
        ESFJ      0.014     0.060     0.022       199
        ESTP      0.022     0.116     0.037    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Reddit→Reddit (MAP decode) ---
Priors: uniform
Accuracy: 0.18490703167585246
Macro-F1: 0.09878951149239515
              precision    recall  f1-score   support

        ISTJ      0.020     0.044     0.027      1659
        ISFJ      0.010     0.031     0.015       700
        INFJ      0.226     0.243     0.234     19468
        INTJ      0.334     0.262     0.294     35804
        ISTP      0.057     0.073     0.064      5006
        ISFP      0.029     0.051     0.037      1134
        INFP      0.216     0.101     0.138     17699
        INTP      0.396     0.174     0.242     45224
        ENTJ      0.058     0.080     0.067      4364
        ENTP      0.207     0.188     0.197     19434
        ENFJ      0.032     0.149     0.053      2094
        ENFP      0.110     0.156     0.129      9784
        ESTJ      0.012     0.031     0.017       448
        ESFJ      0.005     0.038     0.008       265
        ESTP      0.017     0.193     0.031      1279
        ESFP      0.015

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best thresholds (ie,sn,tf,jp): (0.65, 0.35, 0.5, 0.5)   acc: 0.19354132191180765   macroF1: 0.0982407568673606

>>> Threshold search (Reddit val)
Best thresholds (ie,sn,tf,jp): (0.575, 0.35, 0.575, 0.5)   acc: 0.25383077948034644   macroF1: 0.11266795073323638


**FOURTH PART**

In [40]:
import tensorflow as tf

def focal_bce(gamma=2.0, alpha=0.5):
    def loss(y_true, y_pred):
        ##This will clip predictions
        eps = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, eps, 1.0 - eps)

        ##pt = p if correct else 1-p
        pt = y_true * y_pred + (1 - y_true) * (1 - y_pred)

        ##alpha balancing
        w = alpha * y_true + (1 - alpha) * (1 - y_true)

        ##focal loss formula
        return tf.reduce_mean(-w * tf.pow(1 - pt, gamma) * tf.math.log(pt))
    return loss

def make_cnn_axes_plus_softmax(vocab_size, seq_len, emb_dim=96, filters=(192,192,192), drop=0.25, num_types=16, label_smooth=0.05):
    inp = L.Input(shape=(seq_len,), dtype="int32", name="input_layer")
    x = L.Embedding(vocab_size, emb_dim, name="embedding")(inp)
    c3 = L.Conv1D(filters[0], 3, padding="valid", activation="relu")(x)
    c4 = L.Conv1D(filters[1], 4, padding="valid", activation="relu")(x)
    c5 = L.Conv1D(filters[2], 5, padding="valid", activation="relu")(x)
    p = L.Concatenate()([L.GlobalMaxPooling1D()(c3),
                         L.GlobalMaxPooling1D()(c4),
                         L.GlobalMaxPooling1D()(c5)])
    p = L.Dropout(drop)(p)
    h = L.Dense(256, activation="relu")(p)
    h = L.Dropout(drop)(h)

    ##This will set the 4 axis heads
    ie = L.Dense(1, activation="sigmoid", name="ie")(h)
    sn = L.Dense(1, activation="sigmoid", name="sn")(h)
    tfh = L.Dense(1, activation="sigmoid", name="tf")(h)
    jp = L.Dense(1, activation="sigmoid", name="jp")(h)

    ##This will create the 16-way softmax head
    typelogits = L.Dense(num_types, activation=None, name="type_logits")(h)
    typesmx    = L.Activation("softmax", name="type")(typelogits)

    model = keras.Model(inp, [ie, sn, tfh, jp, typesmx], name="axes_plus_softmax")

    ##This will capture losses
    losses = {
        "ie": focal_bce(gamma=2.0, alpha=0.5),
        "sn": focal_bce(gamma=2.0, alpha=0.5),
        "tf": focal_bce(gamma=2.0, alpha=0.5),
        "jp": focal_bce(gamma=2.0, alpha=0.5),
        "type": keras.losses.CategoricalCrossentropy(label_smoothing=label_smooth),
    }
    ##This will set the balance so softmax head gets a decent weight
    loss_w = {"ie":1.0, "sn":1.0, "tf":1.0, "jp":1.0, "type":2.0}

    model.compile(
        optimizer=keras.optimizers.Adam(3e-4),
        loss=losses,
        loss_weights=loss_w,
        metrics={
            "ie":[keras.metrics.BinaryAccuracy(name="acc")],
            "sn":[keras.metrics.BinaryAccuracy(name="acc")],
            "tf":[keras.metrics.BinaryAccuracy(name="acc")],
            "jp":[keras.metrics.BinaryAccuracy(name="acc")],
            "type":[keras.metrics.CategoricalAccuracy(name="acc")]
        }
    )
    return model


In [41]:
import numpy as np
MBTI16 = ['ISTJ','ISFJ','INFJ','INTJ','ISTP','ISFP','INFP','INTP',
          'ENTJ','ENTP','ENFJ','ENFP','ESTJ','ESFJ','ESTP','ESFP']
lab2id = {l:i for i,l in enumerate(MBTI16)}

def axes_to_type_str(bits):
    out=[]
    for b in bits:
        out.append(('I' if b[0]==0 else 'E')+
                   ('S' if b[1]==0 else 'N')+
                   ('T' if b[2]==0 else 'F')+
                   ('J' if b[3]==0 else 'P'))
    return out

def onehot(ids, K=16):
    y = np.zeros((len(ids), K), dtype=np.float32)
    y[np.arange(len(ids)), ids] = 1.0
    return y

##This will build y_type for each split I already created
def make_y_type_from_axes(y_axes):
    t = axes_to_type_str(y_axes.astype(int))
    ids = np.array([lab2id[s] for s in t], dtype=int)
    return onehot(ids, K=16)

##This is for Kaggle val/train parts I already have
y_k_tr_type = make_y_type_from_axes(yk_axes[ds_k_tr.unbatch().map(lambda x,y,w: y['ie']).cardinality().numpy() * 0:0])  # ignore, see below


In [42]:
import numpy as np
import tensorflow as tf
from keras import backend as K

AUTOTUNE = tf.data.AUTOTUNE

def make_axis_ds_with_weights(X, y_axes, wdicts, val_frac=0.10, batch=512, seed=42):
    """
    Build tf.data datasets for the 4 binary MBTI axes *and* return the
    train/val indices so you can derive 16-way one-hot labels later.

    Returns:
        ds_tr, ds_va, (Xva, yva), (idx_tr, idx_va)
    """
    n = len(X)
    idx = np.arange(n)
    rng = np.random.RandomState(seed)
    rng.shuffle(idx)

    n_va = int(n * val_frac)
    idx_va = idx[:n_va]
    idx_tr = idx[n_va:]

    Xtr, Xva = X[idx_tr], X[idx_va]
    ytr, yva = y_axes[idx_tr], y_axes[idx_va]

    ##This will build dicts for the 4 heads
    def to_y_dict(Y):
        Y = Y.astype(np.float32)
        return {
            "ie": Y[:, 0],
            "sn": Y[:, 1],
            "tf": Y[:, 2],
            "jp": Y[:, 3],
        }
    ytr_dict = to_y_dict(ytr)
    yva_dict = to_y_dict(yva)

    ##This will set sample weights for each axis head (tuple order must match model outputs)
    ##Here wdicts is a list/tuple of 4 dicts like {0: w_for_zero, 1: w_for_one}
    Wtr = np.empty_like(ytr, dtype=np.float32)
    for j in range(4):
        Wtr[:, j] = np.where(ytr[:, j] == 1, wdicts[j][1], wdicts[j][0])

    sw_tr = (Wtr[:, 0], Wtr[:, 1], Wtr[:, 2], Wtr[:, 3])
    sw_va = (
        np.ones(len(yva), np.float32),
        np.ones(len(yva), np.float32),
        np.ones(len(yva), np.float32),
        np.ones(len(yva), np.float32),
    )

    ds_tr = (
        tf.data.Dataset
          .from_tensor_slices((Xtr, ytr_dict, sw_tr))
          .shuffle(10000, seed=seed)
          .batch(batch)
          .prefetch(AUTOTUNE)
    )
    ds_va = (
        tf.data.Dataset
          .from_tensor_slices((Xva, yva_dict, sw_va))
          .batch(batch)
          .prefetch(AUTOTUNE)
    )

    ##This will also return (Xva, yva) for metrics, and (idx_tr, idx_va) for building 16 way labels
    return ds_tr, ds_va, (Xva, yva), (idx_tr, idx_va)


In [43]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import accuracy_score, f1_score

##This will Train/val split (Kaggle)
rng = np.random.RandomState(42)
n_k = len(Xk)
idx = np.arange(n_k)
rng.shuffle(idx)

val_frac = 0.10
n_va = int(n_k * val_frac)
idx_va = idx[:n_va]
idx_tr = idx[n_va:]

Xk_tr, Xk_va = Xk[idx_tr], Xk[idx_va]
yk_axes_tr, yk_axes_va = yk_axes[idx_tr], yk_axes[idx_va]

print("Train size:", Xk_tr.shape[0], " Val size:", Xk_va.shape[0])

##This will build 16-way one-hot labels from axes
y_type_tr = make_y_type_from_axes(yk_axes_tr)
y_type_va = make_y_type_from_axes(yk_axes_va)

##This will build label dicts for all 5 heads
y_tr_dict = {
    "ie":   yk_axes_tr[:, 0].astype(np.float32),
    "sn":   yk_axes_tr[:, 1].astype(np.float32),
    "tf":   yk_axes_tr[:, 2].astype(np.float32),
    "jp":   yk_axes_tr[:, 3].astype(np.float32),
    "type": y_type_tr,
}

y_va_dict = {
    "ie":   yk_axes_va[:, 0].astype(np.float32),
    "sn":   yk_axes_va[:, 1].astype(np.float32),
    "tf":   yk_axes_va[:, 2].astype(np.float32),
    "jp":   yk_axes_va[:, 3].astype(np.float32),
    "type": y_type_va,
}

##This will build the multi task model
m_k_multi = make_cnn_axes_plus_softmax(
    vocab_size=VOCAB_SZ,
    seq_len=MAX_LEN,
    emb_dim=96,
    filters=(192, 192, 192),
    drop=0.25,
    num_types=16,
    label_smooth=0.05
)

m_k_multi.summary()

##This will train (test run, few epochs, NO sample_weight)
history_multi_k = m_k_multi.fit(
    Xk_tr,
    y_tr_dict,
    validation_data=(Xk_va, y_va_dict),
    batch_size=256,
    epochs=3,
    callbacks=cb,
    verbose=1
)

##This will do a 16 type evaluation from type head
##The model outputs should be [ie, sn, tf, jp, type]
outputs_va = m_k_multi.predict(Xk_va, batch_size=256, verbose=0)
probs_type_va = outputs_va[-1]

y_pred_ids = probs_type_va.argmax(axis=1)

##This will be our true ids from axes
BIT_WEIGHTS = np.array([8, 4, 2, 1], dtype=int)
y_true_ids = (yk_axes_va.astype(int) @ BIT_WEIGHTS)

print("\nMulti-task type head on Kaggle val:")
print("  accuracy:", accuracy_score(y_true_ids, y_pred_ids))
print("  macro-F1:", f1_score(y_true_ids, y_pred_ids, average="macro"))


Train size: 369824  Val size: 41091


Epoch 1/3
[1m1445/1445[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 35ms/step - ie_acc: 0.7589 - ie_loss: 0.0719 - jp_acc: 0.5740 - jp_loss: 0.0863 - loss: 5.0246 - sn_acc: 0.8621 - sn_loss: 0.0553 - tf_acc: 0.5264 - tf_loss: 0.0875 - type_acc: 0.2043 - type_loss: 2.3618 - val_ie_acc: 0.7678 - val_ie_loss: 0.0690 - val_jp_acc: 0.6045 - val_jp_loss: 0.0841 - val_loss: 4.8693 - val_sn_acc: 0.8659 - val_sn_loss: 0.0520 - val_tf_acc: 0.6019 - val_tf_loss: 0.0828 - val_type_acc: 0.2293 - val_type_loss: 2.2904 - learning_rate: 3.0000e-04
Epoch 2/3
[1m1445/1445[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 30ms/step - ie_acc: 0.7668 - ie_loss: 0.0691 - jp_acc: 0.6043 - jp_loss: 0.0841 - loss: 4.8484 - sn_acc: 0.8627 - sn_loss: 0.0525 - tf_acc: 0.6139 - tf_loss: 0.0821 - type_acc: 0.2391 - type_loss: 2.2803 - val_ie_acc: 0.7678 - val_ie_loss: 0.0689 - val_jp_acc: 0.6069 - val_jp_loss: 0.0837 - val_loss: 4.8513 - val_sn_acc: 0.8659 - val_sn_loss: 0.0518 - val_tf_acc: 0.6066