In [None]:
%pip install ultralytics roboflow opencv-python-headless --quiet

In [None]:
from google.colab import files
files.upload()  # Upload your kaggle.json here

In [None]:
%mkdir -p ~/.kaggle
%cp kaggle.json ~/.kaggle/
%chmod 600 ~/.kaggle/kaggle.json

In [None]:
%kaggle datasets download -d xhlulu/140k-real-and-fake-faces
%unzip -q 140k-real-and-fake-faces.zip -d faces

In [None]:
import pandas as pd
import os
import cv2
import shutil

def prepare_yolo_from_csv(csv_path, split_name):
    # Base path where images are stored
    base_path = "/content/faces/real_vs_fake/real-vs-fake"

    # Read CSV
    df = pd.read_csv(csv_path)

    # Create output directories for YOLO formatted dataset
    out_img_dir = f"/content/dataset/{split_name}/images"
    out_lbl_dir = f"/content/dataset/{split_name}/labels"
    os.makedirs(out_img_dir, exist_ok=True)
    os.makedirs(out_lbl_dir, exist_ok=True)

    # Iterate rows and copy images + create label txt files
    for _, row in df.iterrows():
        # Full path to image file
        image_path = os.path.join(base_path, row['path'])

        # Label from CSV (real=1, fake=0), no flipping
        label = row['label']

        # Just the image filename, e.g. "31355.jpg"
        filename = os.path.basename(row['path'])

        # Destination path for image copy
        dst_image_path = os.path.join(out_img_dir, filename)
        shutil.copy(image_path, dst_image_path)

        # Read image size for YOLO label (not strictly needed here since full image box)
        img = cv2.imread(image_path)
        h, w = img.shape[:2]

        # YOLO format label: class x_center y_center width height (all normalized 0-1)
        # Full image box: center = 0.5, 0.5; width=1, height=1
        yolo_label = f"{label} 0.5 0.5 1.0 1.0\n"

        # Write label file with same filename but .txt extension
        label_path = os.path.join(out_lbl_dir, filename.rsplit('.', 1)[0] + ".txt")
        with open(label_path, "w") as f:
            f.write(yolo_label)

    print(f"Processed {split_name} set: {len(df)} images")

# Run for train, valid, and test splits (update paths if different)
prepare_yolo_from_csv("/content/faces/train.csv", "train")
prepare_yolo_from_csv("/content/faces/valid.csv", "val")
prepare_yolo_from_csv("/content/faces/test.csv", "test")  # if test.csv exists


In [None]:
data_yaml = """
train: /content/dataset/train/images
val: /content/dataset/val/images
test: /content/dataset/test/images

nc: 2
names: ['fake', 'real']
"""

with open("/content/data.yaml", "w") as f:
    f.write(data_yaml.strip())


from ultralytics import YOLO

# Load the YOLOv8s model
model = YOLO("yolov8s.pt")

# Train the model
model.train(
    data="/content/data.yaml",
    epochs=5,
    batch=64,
    imgsz=640
)



In [None]:
from ultralytics import YOLO
import os

# Load the best trained model
best_model_path = "/content/runs/detect/train/weights/best.pt"

try:
    model = YOLO(best_model_path)

    # Define the path to your testing images
    test_images_path = "/content/dataset/test/images"

    # Check if the test images directory exists
    if os.path.exists(test_images_path):
        # Get a list of all image files in the directory
        image_files = [os.path.join(test_images_path, f) for f in os.listdir(test_images_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        # Select the first 50 image paths
        top_50_images = image_files[:50]

        if top_50_images:
            # Run prediction on the top 50 images
            results = model.predict(source=top_50_images, save=True) # Set save=True to save predicted images

            print(f"Predictions completed for the top 50 images. Results are saved in the 'predict' folder within your runs directory (e.g., /content/runs/detect/predict).")

            # Iterate through the results and display image ID (filename)
            for i, r in enumerate(results):
                # Get the original image path that corresponds to this result
                original_image_path = top_50_images[i]
                image_filename = os.path.basename(original_image_path)

                print(f"\n--- Predictions for Image: {image_filename} ---")
                # Access and print details from the Results object 'r'
                # For example, to print bounding boxes:
                # print(r.boxes)
                # You can also display the predicted image if you saved it
                # (by setting save=True in model.predict)
                # The saved image will be in the 'predict' folder
                # The path can be inferred based on the original image path and predict folder structure

        else:
            print(f"No image files found in {test_images_path}")

    else:
        print(f"Error: Test images directory not found at {test_images_path}")
        print("Please ensure the path to your test images is correct.")

except FileNotFoundError:
    print(f"Error: Model file not found at {best_model_path}")
    print("Please ensure the training completed successfully and the path to the best model is correct.")

In [None]:
# prompt: code to predict for the img=age we give as input

# Use files.upload() to upload the image you want to predict on
print("Please upload the image you want to predict on:")
uploaded_image = files.upload()

# Assuming you upload only one file, get its filename
image_filename = list(uploaded_image.keys())[0]
uploaded_image_path = f"/content/{image_filename}"

# Check if the best model exists before attempting prediction
best_model_path = "/content/runs/detect/train/weights/best.pt"

if os.path.exists(best_model_path):
    try:
        # Load the trained model
        model = YOLO(best_model_path)

        # Run prediction on the uploaded image
        results = model.predict(source=uploaded_image_path, save=True, save_txt=True) # save=True saves predicted image with boxes, save_txt=True saves labels

        print(f"\n--- Prediction for Uploaded Image: {image_filename} ---")
        print(f"Prediction results saved in '/content/runs/detect/predict'.")

        # You can optionally iterate through the results to print prediction details
        for r in results:
            print("Bounding Boxes and Confidence Scores:")
            # Iterate through detected boxes in the result object
            for box in r.boxes:
                cls = int(box.cls) # Class index
                conf = box.conf.item() # Confidence score
                # Get the class name from the model's names attribute
                class_name = model.names[cls]
                print(f"  Class: {class_name}, Confidence: {conf:.2f}")

    except Exception as e:
        print(f"An error occurred during prediction: {e}")

else:
    print(f"Error: Model file not found at {best_model_path}")
    print("Please ensure the training completed successfully and the path to the best model is correct.")