In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import numpy as np
import os

save_dir = "/content/drive/MyDrive/ai_eval_data"

X_metric_train = np.load(f"{save_dir}/X_metric_train.npy", allow_pickle=True)
X_pair_train   = np.load(f"{save_dir}/X_pair_train.npy", allow_pickle=True)
y_train        = np.load(f"{save_dir}/y_train.npy", allow_pickle=True).astype(float)

X_metric_test  = np.load(f"{save_dir}/X_metric_test.npy", allow_pickle=True)
X_pair_test    = np.load(f"{save_dir}/X_pair_test.npy", allow_pickle=True)


print("Shapes:")
print("X_metric_train:", X_metric_train.shape)
print("X_pair_train  :", X_pair_train.shape)
print("y_train       :", y_train.shape)
print("X_metric_test :", X_metric_test.shape)
print("X_pair_test   :", X_pair_test.shape)



Shapes:
X_metric_train: (5000, 768)
X_pair_train  : (5000, 768)
y_train       : (5000,)
X_metric_test : (3638, 768)
X_pair_test   : (3638, 768)


In [3]:
print(X_metric_train.dtype, X_metric_train.shape)
print(X_pair_train.dtype, X_pair_train.shape)

float32 (5000, 768)
float32 (5000, 768)


In [4]:
X_metric_train = np.vstack(X_metric_train).astype('float32')
X_pair_train   = np.vstack(X_pair_train).astype('float32')

X_metric_test = np.vstack(X_metric_test).astype('float32')
X_pair_test   = np.vstack(X_pair_test).astype('float32')

In [5]:
from collections import Counter

y_int = np.round(y_train).astype(int)
counts = Counter(y_int)
print("Class counts:", counts)

class_weight = {cls: 1.0/counts[cls] for cls in counts}
sample_weights = np.array([class_weight[c] for c in y_int])

print("Sample weights sample:", sample_weights[:10])


Class counts: Counter({np.int64(9): 3123, np.int64(10): 1443, np.int64(8): 259, np.int64(7): 95, np.int64(6): 45, np.int64(0): 13, np.int64(3): 7, np.int64(1): 6, np.int64(2): 5, np.int64(4): 3, np.int64(5): 1})
Sample weights sample: [0.000693   0.000693   0.000693   0.000693   0.0003202  0.0003202
 0.0003202  0.003861   0.000693   0.01052632]


In [6]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, concatenate, Dot, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2

metric_dim = X_metric_train.shape[1]
pair_dim   = X_pair_train.shape[1]

# Inputs
inp_metric = Input(shape=(metric_dim,), name="metric_input")
inp_pair   = Input(shape=(pair_dim,),   name="pair_input")

# Towers
x_metric = Dense(512, activation="relu", kernel_regularizer=l2(1e-4))(inp_metric)
x_metric = Dropout(0.2)(x_metric)

x_pair   = Dense(512, activation="relu", kernel_regularizer=l2(1e-4))(inp_pair)
x_pair   = Dropout(0.2)(x_pair)

# Similarity features
# cos_sim = Dot(axes=1, normalize=True)([x_metric, x_pair])
# abs_diff = Lambda(lambda t: tf.abs(t[0] - t[1]))([x_metric, x_pair])

# combined = concatenate([x_metric, x_pair, cos_sim, abs_diff])

combined = concatenate([x_metric, x_pair])
# x = Dense(512, activation="relu")(combined)
# x = Dropout(0.2)(x)

out = Dense(1, activation="linear")(combined)

model = Model([inp_metric, inp_pair], out)


In [7]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(3e-5),
    loss="mse",
    metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")]
)

model.summary()


In [8]:
es = tf.keras.callbacks.EarlyStopping(
    monitor="val_rmse",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    [X_metric_train, X_pair_train],
    y_train,
    sample_weight=sample_weights,
    epochs=40,
    batch_size=32,
    validation_split=0.1,
    shuffle=True,
    callbacks=[es]
)


Epoch 1/40
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - loss: 0.1870 - rmse: 8.9611 - val_loss: 0.1680 - val_rmse: 8.4649
Epoch 2/40
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1616 - rmse: 8.3424 - val_loss: 0.1465 - val_rmse: 7.8663
Epoch 3/40
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1430 - rmse: 7.7058 - val_loss: 0.1277 - val_rmse: 7.2432
Epoch 4/40
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1229 - rmse: 7.1065 - val_loss: 0.1116 - val_rmse: 6.6401
Epoch 5/40
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1105 - rmse: 6.5065 - val_loss: 0.0985 - val_rmse: 6.1298
Epoch 6/40
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0973 - rmse: 5.9896 - val_loss: 0.0876 - val_rmse: 5.6685
Epoch 7/40
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0

In [9]:
pred_test = model.predict([X_metric_test, X_pair_test]).reshape(-1)
pred_test = np.clip(pred_test, 0, 10)  # keep within valid range
pred_test = np.ceil(pred_test).astype(int)

import pandas as pd

df_submit = pd.DataFrame({
    "ID": np.arange(1, len(pred_test) + 1),
    "score": pred_test
})

df_submit.to_csv(f"{save_dir}/submission_regression2.csv", index=False)
print("Saved:", f"{save_dir}/submission_regression2.csv")


[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Saved: /content/drive/MyDrive/ai_eval_data/submission_regression2.csv


In [10]:
unique, counts = np.unique(pred_test, return_counts=True)
class_distribution = dict(zip(unique, counts))

print("Class Distribution:", class_distribution)

Class Distribution: {np.int64(4): np.int64(24), np.int64(5): np.int64(927), np.int64(6): np.int64(2608), np.int64(7): np.int64(79)}
