In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import wandb
import os
import cv2
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, classification_report
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from kaggle_secrets import UserSecretsClient

In [None]:
# Initialize W&B with Kaggle secrets
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("WANDB_KEY")
wandb.login(key=secret_value_0)

# Initialize W&B run
run = wandb.init(project="ai-vs-human-generated-images", name="Classification by CNN", entity="DevGru")


In [None]:
# Log hyperparameters
wandb.config = {
    "epochs": 5,
    "batch_size": 32,
    "IMG_SIZE": 128,
    "optimizer": 'adam',
    "loss_function": 'binary_crossentropy',
}

In [None]:
# Load and preprocess data
df = (pd.read_csv('/kaggle/input/ai-vs-human-generated-dataset/train.csv')).sample(n=1000, random_state=42)
IMG_SIZE = 128
DATA_DIR = "/kaggle/input/ai-vs-human-generated-dataset"

In [None]:
df.head()

In [None]:
print(df['label'].value_counts())

In [None]:
# Load images and labels
images, labels = [], []
for _, row in df.iterrows():
    img_path = os.path.join(DATA_DIR, row['file_name'])  
    label = row['label']
    
    img = cv2.imread(img_path)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    
    images.append(img)
    labels.append(label)

X = np.array(images)
y = np.array(labels)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define CNN model
inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = tf.keras.layers.Conv2D(32, (3,3), activation='relu')(inputs)
x = tf.keras.layers.MaxPooling2D(2,2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Conv2D(64, (3,3), activation='relu')(x)
x = tf.keras.layers.MaxPooling2D(2,2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

In [None]:
# Custom W&B callback
class WandbCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        wandb.log({
            "train_loss": logs["loss"],
            "train_accuracy": logs["accuracy"],
            "val_loss": logs["val_loss"],
            "val_accuracy": logs["val_accuracy"]
        })

In [None]:
# Train model
wandb_callback = WandbCallback()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=5,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr, wandb_callback]
)

In [None]:
# Evaluate model
y_pred = model.predict(X_val)
y_pred_labels = (y_pred > 0.5).astype(int)

# Calculate metrics
f1 = f1_score(y_val, y_pred_labels)
conf_matrix = confusion_matrix(y_val, y_pred_labels)
class_report = classification_report(y_val, y_pred_labels, target_names=['Human', 'AI'])

In [None]:
# Print evaluation results
print(f"F1 Score: {f1:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

# Log final metrics and model to W&B
wandb.log({
    "f1_score": f1
})

# Save the model directly to W&B
model.save('model.h5')
artifact = wandb.Artifact('trained_cnn_model', type='model')
artifact.add_file('model.h5')
wandb.log_artifact(artifact)

# End the W&B run
wandb.finish()

In [None]:
pip freeze > requirements.txt