In [None]:
import torch
import torchvision
import numpy as np
import os
from PIL import Image
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm
import pandas as pd

In [None]:
root_dir = '/content/data/data'

for split in ['train', 'val']:
    images_dir = os.path.join(root_dir, 'images', split)
    labels_dir = os.path.join(root_dir, 'rcnn_labels', split)  # Dönüştürülmüş label için
    orig_labels_dir = os.path.join(root_dir, 'labels', split)  # Orijinal YOLO label için

    images_count = len([f for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.png')])
    labels_count = len([f for f in os.listdir(labels_dir) if f.endswith('.txt')])
    orig_labels_count = len([f for f in os.listdir(orig_labels_dir) if f.endswith('.txt')])

    print(f"\n{split.upper()} klasörü:")
    print(f"Images: {images_count}")
    print(f"RCNN labels: {labels_count}")
    print(f"YOLO labels: {orig_labels_count}")

In [None]:
root_dir = '/content/data/data'
splits = ['train', 'val']

for split in splits:
    yolo_labels_dir = os.path.join(root_dir, 'labels', split)
    yolo_images_dir = os.path.join(root_dir, 'images', split)
    rcnn_labels_dir = os.path.join(root_dir, 'rcnn_labels', split)

    os.makedirs(rcnn_labels_dir, exist_ok=True)

    for label_file in os.listdir(yolo_labels_dir):
        yolo_label_path = os.path.join(yolo_labels_dir, label_file)
        image_name = label_file.replace('.txt', '.jpg')
        image_path = os.path.join(yolo_images_dir, image_name)
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            continue

        img = Image.open(image_path)
        W, H = img.size

        with open(yolo_label_path, 'r') as f:
            lines = f.readlines()

        new_lines = []
        for line in lines:
            parts = line.strip().split()
            cls = int(parts[0])
            x_c, y_c, w, h = map(float, parts[1:])

            xmin = int((x_c - w/2) * W)
            ymin = int((y_c - h/2) * H)
            xmax = int((x_c + w/2) * W)
            ymax = int((y_c + h/2) * H)

            # Faster R-CNN format: xmin ymin xmax ymax class
            new_line = f"{xmin} {ymin} {xmax} {ymax} {cls}\n"
            new_lines.append(new_line)

        out_path = os.path.join(rcnn_labels_dir, label_file)
        with open(out_path, 'w') as out_f:
            out_f.writelines(new_lines)

In [None]:
#For train directory
images_dir = '/content/data/data/images/train'
labels_dir = '/content/data/data/rcnn_labels/train'  

image_files = set([os.path.splitext(f)[0] for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.png')])
label_files = set([os.path.splitext(f)[0] for f in os.listdir(labels_dir)])

images_without_labels = image_files - label_files
print("Label'ı olmayan image dosyaları:", images_without_labels)

In [None]:
for img_base in images_without_labels:
    for ext in ['.jpg', '.png']:
        img_path = os.path.join(images_dir, img_base + ext)
        if os.path.exists(img_path):
            os.remove(img_path)
            print(f"Silindi: {img_path}")

In [None]:
#For val directory
images_dir = '/content/data/data/images/val'
labelsrcnn_dir = '/content/data/data/rcnn_labels/val'

image_basenames = set(os.path.splitext(f)[0] for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.png'))
label_basenames = set(os.path.splitext(f)[0] for f in os.listdir(labelsrcnn_dir) if f.endswith('.txt'))

images_without_labels = image_basenames - label_basenames

print("Label'ı olmayan image dosyaları:")
for img_base in images_without_labels:
    print(img_base)


In [None]:
for img_base in images_without_labels:
    for ext in ['.jpg', '.png']:
        img_path = os.path.join(images_dir, img_base + ext)
        if os.path.exists(img_path):
            os.remove(img_path)
            print(f"Silindi: {img_path}")


In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, labels_dir, transforms=None):
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(self.images_dir)))
        self.boxes = list(sorted(os.listdir(self.labels_dir)))

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.imgs[idx])
        img = Image.open(img_path).convert("RGB")

        box_path = os.path.join(self.labels_dir, self.boxes[idx])
        boxes = []
        labels = []
        with open(box_path) as f:
            for line in f:
                parts = line.strip().split()
                xmin, ymin, xmax, ymax, label = map(float, parts)
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(int(label))
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        if self.transforms:
            img = self.transforms(img)
        else:
            img = F.to_tensor(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
train_images_dir = '/content/data/data/images/train'
train_labels_dir = '/content/data/data/rcnn_labels/train'
val_images_dir = '/content/data/data/images/val'
val_labels_dir = '/content/data/data/rcnn_labels/val'

train_dataset = CustomDataset(train_images_dir, train_labels_dir)
val_dataset = CustomDataset(val_images_dir, val_labels_dir)

In [None]:
train_loader = DataLoader(
    train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x))
)

val_loader = DataLoader(
    val_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x))
)

In [None]:
num_classes = 5

# Pretrained model
model = fasterrcnn_resnet50_fpn(weights="DEFAULT")

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
num_epochs = 50
n_samples = 250

# Lists to save losses
train_epoch_losses = []
val_epoch_losses = []

for epoch in range(num_epochs):
    # Random subset for each epoch
    indices = np.random.choice(len(train_dataset), n_samples, replace=False)
    subset_train_dataset = Subset(train_dataset, indices)
    train_loader = DataLoader(
        subset_train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x))
    )

    model.train()
    epoch_loss = 0
    print(f"Epoch {epoch+1}/{num_epochs}")

    # Train
    for images, targets in tqdm(train_loader, desc=f"Train Epoch {epoch+1}"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        epoch_loss += losses.item()

    avg_train_loss = epoch_loss / len(train_loader)
    train_epoch_losses.append(avg_train_loss)

    # Validation (Val)
    model.eval()
    val_loss = 0
    val_batches = 0
    with torch.no_grad():
        for images, targets in tqdm(val_loader, desc=f"Val Epoch {epoch+1}"):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Çoğu PyTorch sürümünde: model(images, targets) -> dict (losslar)
            out = model(images, targets)
            if isinstance(out, dict):
                losses = sum(loss for loss in out.values())
                val_loss += losses.item()
                val_batches += 1
            else:
                # Sadece prediction dönüyorsa validation loss kaydedilmez
                print("Uyarı: Eval modda loss alınamıyor, sadece prediction döndü.")
                break
    avg_val_loss = val_loss / val_batches if val_batches > 0 else None
    val_epoch_losses.append(avg_val_loss)

    # (Optional) Learning rate scheduler
    if 'lr_scheduler' in locals():
        lr_scheduler.step()

    print(f"Epoch {epoch+1}/{num_epochs} - Train Avg Loss: {avg_train_loss:.4f}, Val Avg Loss: {avg_val_loss}")

    # To save the model at the end of each epoch
    # torch.save(model.state_dict(), f'/content/data/data/rcnn_labels/fasterrcnn_epoch{epoch+1}.pth')

# Save last model weights
torch.save(model.state_dict(), '/content/data/data/fasterrcnn_final.pth')
print("Model is saved: /content/data/data/fasterrcnn_final.pth")

# Save Train and Val Losses
loss_df = pd.DataFrame({
    'epoch': list(range(1, num_epochs+1)),
    'train_loss': train_epoch_losses,
    'val_loss': val_epoch_losses
})
loss_df.to_csv('/content/data/data/training_log.csv', index=False)
print("Losses are saved: /content/data/data/training_log.csv")
