# Additional testing on other methods

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load features and labels
X = np.load("/kaggle/input/clip-embeddings-test/clip_features_80_448.npy")
y = np.load("/kaggle/input/clip-embeddings-test/clip_labels_80_448.npy")

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Shuffle data
indices = np.random.permutation(len(X_scaled))
X_scaled = X_scaled[indices]
y = y[indices]

# Train XGBoost
xgb = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    use_label_encoder=False,
    eval_metric="logloss",
    random_state=42
)

xgb.fit(X_scaled, y)

# --- Predict on 20% set ---

# Load 20% features and labels
X_20 = np.load("/kaggle/input/clip-embeddings-test/clip_features_20_448.npy")
y_20 = np.load("/kaggle/input/clip-embeddings-test/clip_labels_20_448.npy")

# Normalize using training scaler
X_20_scaled = scaler.transform(X_20)

# Predict
y_pred = xgb.predict(X_20_scaled)

# Evaluation Metrics
acc = accuracy_score(y_20, y_pred)
prec = precision_score(y_20, y_pred)
rec = recall_score(y_20, y_pred)
f1 = f1_score(y_20, y_pred)

print(f"XGBoost Evaluation on 20% Set:")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")


XGBoost Evaluation on 20% Set:
Accuracy:  0.9512
Precision: 0.9528
Recall:    0.9483
F1 Score:  0.9506


In [2]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load features and labels
X = np.load("/kaggle/input/clip-embeddings-test/clip_features_80_448.npy")
y = np.load("/kaggle/input/clip-embeddings-test/clip_labels_80_448.npy")

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Shuffle data
indices = np.random.permutation(len(X_scaled))
X_scaled = X_scaled[indices]
y = y[indices]

# Train Logistic Regression
lr = LogisticRegression(
    max_iter=1000,
    solver="lbfgs",
    class_weight="balanced",  # Optional: in case of class imbalance
    random_state=42
)
lr.fit(X_scaled, y)

# --- Predict on 20% set ---

# Load 20% features and labels
X_20 = np.load("/kaggle/input/clip-embeddings-test/clip_features_20_448.npy")
y_20 = np.load("/kaggle/input/clip-embeddings-test/clip_labels_20_448.npy")

# Normalize using the training scaler
X_20_scaled = scaler.transform(X_20)

# Predict
y_pred = lr.predict(X_20_scaled)

# Evaluation
acc = accuracy_score(y_20, y_pred)
prec = precision_score(y_20, y_pred)
rec = recall_score(y_20, y_pred)
f1 = f1_score(y_20, y_pred)

print(f"Logistic Regression Evaluation on 20% Set:")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 Score:  {f1:.4f}")


Logistic Regression Evaluation on 20% Set:
Accuracy:  0.9767
Precision: 0.9764
Recall:    0.9766
F1 Score:  0.9765
