In [None]:
# ai_art_classification/
# ├── train/
# │   ├── AI_GENERATED/         ← fake → label = 1
# │   └── NON_AI_GENERATED/     ← real → label = 0
# ├── test/

In [3]:
import os

base_dir = "C:\\Users\\anush\\Downloads\\archive\\ai_art_classification"
print("Train folders:", os.listdir(os.path.join(base_dir, "train")))

Train folders: ['AI_GENERATED', 'NON_AI_GENERATED']


In [13]:
# Extract Features with DINOv2

import os
from PIL import Image
import torch
from transformers import AutoImageProcessor, AutoModel
from tqdm import tqdm

# Load DINOv2
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base", use_fast=True)
model = AutoModel.from_pretrained("facebook/dinov2-base")
model.eval()

Dinov2Model(
  (embeddings): Dinov2Embeddings(
    (patch_embeddings): Dinov2PatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): Dinov2Encoder(
    (layer): ModuleList(
      (0-11): 12 x Dinov2Layer(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attention): Dinov2Attention(
          (attention): Dinov2SelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
          )
          (output): Dinov2SelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (layer_scale1): Dinov2LayerScale()
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06,

In [6]:
import numpy as np

data = np.load("C:\\Users\\anush\\OneDrive\\Desktop\\github repos\\CV_Final_Project\\DINO - Fake Detection\\dino_features_train.npz")
X = data["X"]
y = data["y"]
print("Loaded features:", X.shape, y.shape)

Loaded features: (18618, 768) (18618,)


In [None]:
# train classifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

clf = LogisticRegression(max_iter = 5000)
clf.fit(X_train, y_train)

# Save model
os.makedirs("cv_final_model", exist_ok=True)
joblib.dump(clf, "cv_final_model/lr_model.pkl")
print("Model saved to model/lr_model.pkl")

Model saved to model/lr_model.pkl


In [15]:
# Predict on the test set
y_pred = clf.predict(X_test)

# Accuracy
accuracy = (y_pred == y_test).mean()
print(f"Accuracy: {accuracy * 100:.2f}%")

# Detailed report
print("\n Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Human-made", "AI-generated"]))

# Confusion matrix
print("\n Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 90.87%

 Classification Report:
              precision    recall  f1-score   support

  Human-made       0.91      0.88      0.90      1658
AI-generated       0.91      0.93      0.92      2066

    accuracy                           0.91      3724
   macro avg       0.91      0.91      0.91      3724
weighted avg       0.91      0.91      0.91      3724


 Confusion Matrix:
[[1465  193]
 [ 147 1919]]


In [26]:
# train classifier
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

clf = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="mlogloss")
clf.fit(X_train, y_train)

# Save model
os.makedirs("cv_final_model", exist_ok=True)
joblib.dump(clf, "cv_final_model/xgb_model.pkl")
print("Model saved to model/xgb_model.pkl")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Model saved to model/xgb_model.pkl


In [25]:
# Predict on the test set
y_pred = clf.predict(X_test)

# Accuracy
accuracy = (y_pred == y_test).mean()
print(f"Accuracy: {accuracy * 100:.2f}%")

# Detailed report
print("\n Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Human-made", "AI-generated"]))

# Confusion matrix
print("\n Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 86.28%

 Classification Report:
              precision    recall  f1-score   support

  Human-made       0.93      0.75      0.83      1658
AI-generated       0.83      0.95      0.89      2066

    accuracy                           0.86      3724
   macro avg       0.88      0.85      0.86      3724
weighted avg       0.87      0.86      0.86      3724


 Confusion Matrix:
[[1245  413]
 [  98 1968]]
