# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
import os
from torchvision import transforms

class DuckDataset(Dataset):
    def __init__(self, annotations, img_dir, transform=None):
        self.annotations = annotations
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        row = self.annotations.iloc[idx]
        img_path = os.path.join(self.img_dir, f"{row['DatapointID']}.png")
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        bbox = [int(x) for x in row['BoundingBox'].split()]

        if self.transform:
            image = self.transform(image)

        label = torch.tensor([row['DuckOrNoDuck']], dtype=torch.float32)
        bbox = torch.tensor(bbox, dtype=torch.float32)

        return image, label, bbox

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

annotations = pd.read_csv('./train_dataset/train-data.csv')
dataset = DuckDataset(annotations, './train_dataset/train_dataset/', transform=transform)
loader = DataLoader(dataset, batch_size=16, shuffle=True)


In [12]:
import torch.nn as nn
import torchvision.models as models

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class DuckDetector(nn.Module):
    def __init__(self):
        super(DuckDetector, self).__init__()

        # Pre-trained CNN feature extractor
        self.base_model = models.resnet18(pretrained=True)
        num_features = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity()

        # Classification head (Duck or No Duck)
        self.classifier = nn.Sequential(
            nn.Linear(num_features, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

        # Bounding box regression head (x1, y1, x2, y2)
        self.bbox_regressor = nn.Sequential(
            nn.Linear(num_features, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 4)
        )

    def forward(self, x):
        features = self.base_model(x)
        classification = self.classifier(features)
        bbox = self.bbox_regressor(features)
        return classification, bbox

model = DuckDetector().to(device)




In [13]:
import torch.optim as optim

criterion_class = nn.BCELoss()
criterion_bbox = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

epochs = 20
model.train()

for epoch in range(epochs):
    total_loss = 0
    for images, labels, bboxes in loader:
        images = images.to(device)
        labels = labels.to(device)
        bboxes = bboxes.to(device)

        optimizer.zero_grad()

        outputs_class, outputs_bbox = model(images)

        loss_class = criterion_class(outputs_class, labels)

        # Only calculate bbox loss if duck exists (label == 1)
        bbox_mask = labels.squeeze() == 1
        if bbox_mask.any():
            loss_bbox = criterion_bbox(outputs_bbox[bbox_mask], bboxes[bbox_mask])
        else:
            loss_bbox = torch.tensor(0.0).to(device)

        # Combine losses clearly
        loss = loss_class + loss_bbox
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(loader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")


Epoch [1/20], Loss: 54115.3501
Epoch [2/20], Loss: 42444.3839
Epoch [3/20], Loss: 25055.9872
Epoch [4/20], Loss: 14013.5585
Epoch [5/20], Loss: 9196.0190
Epoch [6/20], Loss: 7572.9933
Epoch [7/20], Loss: 6724.6425
Epoch [8/20], Loss: 6685.1042
Epoch [9/20], Loss: 6398.6897
Epoch [10/20], Loss: 6310.6239
Epoch [11/20], Loss: 6005.8485
Epoch [12/20], Loss: 6402.2382
Epoch [13/20], Loss: 5785.2452
Epoch [14/20], Loss: 5827.6386
Epoch [15/20], Loss: 5447.7969
Epoch [16/20], Loss: 5274.1383
Epoch [17/20], Loss: 5010.1621
Epoch [18/20], Loss: 4411.1119
Epoch [19/20], Loss: 3701.7318
Epoch [20/20], Loss: 3269.1962


In [15]:
torch.save(model.state_dict(), 'duck_detector_epoch20.pth')


In [16]:
# First, initialize the model architecture
model = DuckDetector().to(device)

# Load your previously saved weights
model.load_state_dict(torch.load('duck_detector_epoch20.pth', map_location=device))

# Now, continue training normally
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)  # make sure optimizer is defined again
criterion_class = nn.BCELoss()
criterion_bbox = nn.MSELoss()

model.train()
additional_epochs = 10  # clearly define how many more epochs you want

for epoch in range(additional_epochs):
    total_loss = 0
    for images, labels, bboxes in loader:
        images = images.to(device)
        labels = labels.to(device)
        bboxes = bboxes.to(device)

        optimizer.zero_grad()

        outputs_class, outputs_bbox = model(images)

        loss_class = criterion_class(outputs_class, labels)

        bbox_mask = labels.squeeze() == 1
        if bbox_mask.any():
            loss_bbox = criterion_bbox(outputs_bbox[bbox_mask], bboxes[bbox_mask])
        else:
            loss_bbox = torch.tensor(0.0).to(device)

        loss = loss_class + loss_bbox
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(loader)
    print(f"Continued Training Epoch [{epoch+1}/{additional_epochs}], Loss: {avg_loss:.4f}")




Continued Training Epoch [1/10], Loss: 3007.1113
Continued Training Epoch [2/10], Loss: 2651.5058
Continued Training Epoch [3/10], Loss: 2465.0838
Continued Training Epoch [4/10], Loss: 2459.6363
Continued Training Epoch [5/10], Loss: 2241.5328
Continued Training Epoch [6/10], Loss: 2112.7547
Continued Training Epoch [7/10], Loss: 2006.4342
Continued Training Epoch [8/10], Loss: 2015.6352
Continued Training Epoch [9/10], Loss: 1966.5223
Continued Training Epoch [10/10], Loss: 1901.3163


In [18]:
torch.save(model.state_dict(), 'duck_detector_epoch30.pth')


In [19]:
import cv2
import numpy as np

test_images_path = './test_dataset/test_dataset/'
test_files = sorted(os.listdir(test_images_path), key=lambda x: int(x.split('.')[0]))

results = []
model.eval()

for file_name in test_files:
    image_id = int(file_name.split('.')[0])
    img_path = os.path.join(test_images_path, file_name)

    image = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = transform(img_rgb).unsqueeze(0).to(device)

    with torch.no_grad():
        pred_class, pred_bbox = model(img_tensor)

    pred_class = pred_class.item()
    pred_bbox = pred_bbox.cpu().numpy().flatten().astype(int)

    duck_present = 1 if pred_class > 0.5 else 0

    if duck_present:
        # Get bbox coordinates clearly
        x1, y1, x2, y2 = pred_bbox
        bbox = f"{x1} {y1} {x2} {y2}"

        # Extract the predicted bbox area from original image
        bbox_img = image[y1:y2, x1:x2]
        gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)

        # Threshold to count actual black pixels (duck pixels clearly)
        _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
        pixel_count = cv2.countNonZero(thresh)
    else:
        bbox = "0 0 0 0"
        pixel_count = 0

    results.append([image_id, duck_present, pixel_count, bbox])

# Save accurate results
submission_df = pd.DataFrame(results, columns=['DatapointID', 'DuckOrNoDuck', 'PixelCount', 'BoundingBox'])
submission_df.to_csv('submission.csv', index=False)


In [20]:
import cv2
import numpy as np

# Let's assume you calculated this previously:
black_pixel_ratio = 0.65  # replace with your actual calculated ratio!

test_images_path = './test_dataset/test_dataset/'
test_files = sorted(os.listdir(test_images_path), key=lambda x: int(x.split('.')[0]))

results = []
model.eval()

for file_name in test_files:
    image_id = int(file_name.split('.')[0])
    img_path = os.path.join(test_images_path, file_name)

    image = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = transform(img_rgb).unsqueeze(0).to(device)

    with torch.no_grad():
        pred_class, pred_bbox = model(img_tensor)

    pred_class = pred_class.item()
    pred_bbox = pred_bbox.cpu().numpy().flatten().astype(int)

    duck_present = 1 if pred_class > 0.5 else 0

    if duck_present:
        # Predicted bbox
        x1, y1, x2, y2 = pred_bbox
        bbox = f"{x1} {y1} {x2} {y2}"

        # Predicted bbox area
        pred_area = (x2 - x1) * (y2 - y1)

        # Extract bbox area and count black pixels
        bbox_img = image[y1:y2, x1:x2]
        gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)

        _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
        black_pixels = cv2.countNonZero(thresh)

        # Maximum black pixels according to original ratio
        max_black_pixels = int(pred_area * black_pixel_ratio)

        # Adjust pixel count clearly
        if black_pixels > max_black_pixels:
            pixel_count = max_black_pixels
        else:
            pixel_count = black_pixels

    else:
        bbox = "0 0 0 0"
        pixel_count = 0

    results.append([image_id, duck_present, pixel_count, bbox])

# Save your final accurate results
submission_df = pd.DataFrame(results, columns=['DatapointID', 'DuckOrNoDuck', 'PixelCount', 'BoundingBox'])
submission_df.to_csv('submission.csv', index=False)


In [54]:
train_images_path = './train_dataset/train_dataset/'
train_files = sorted(
    [x for x in os.listdir(train_images_path) if x.split('.')[0].isdigit()],
    key=lambda x: int(x.split('.')[0])
)

initial_pixel_preds = []

model.eval()

for file_name in train_files:
    img_path = os.path.join(train_images_path, file_name)
    image = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = transform(img_rgb).unsqueeze(0).to(device)

    with torch.no_grad():
        pred_class, pred_bbox = model(img_tensor)

    pred_bbox = pred_bbox.cpu().numpy().flatten().astype(int)
    x1, y1, x2, y2 = pred_bbox

    bbox_img = image[y1:y2, x1:x2]
    if bbox_img.size == 0:
        bbox_img = np.zeros((32,32,3), dtype=np.uint8)

    gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
    pixel_count = cv2.countNonZero(thresh)

    initial_pixel_preds.append(pixel_count)

print(len(initial_pixel_preds))

1828


In [30]:
annotations = pd.read_csv('./train_dataset/train-data.csv')

assert len(initial_pixel_preds) == len(annotations), "Predictions and annotations must match!"

refiner_dataset = PixelCountRefinerDataset(
    annotations,
    './train_dataset/train_dataset/',
    initial_pixel_preds,
    transform=transform_refiner
)
refiner_loader = DataLoader(refiner_dataset, batch_size=16, shuffle=True)


In [23]:
class PixelCountRefinerDataset(Dataset):
    def __init__(self, annotations, img_dir, initial_predictions, transform=None):
        self.annotations = annotations
        self.img_dir = img_dir
        self.initial_predictions = initial_predictions
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        row = self.annotations.iloc[idx]
        img_id = row['DatapointID']
        bbox = [int(x) for x in row['BoundingBox'].split()]
        gt_pixel_count = row['PixelCount']

        img_path = os.path.join(self.img_dir, f"{img_id}.png")
        image = cv2.imread(img_path)

        # Crop predicted bbox region
        x1, y1, x2, y2 = bbox
        cropped_img = image[y1:y2, x1:x2]

        if cropped_img.size == 0:  # Avoid empty crops
            cropped_img = np.zeros((32,32,3), dtype=np.uint8)

        if self.transform:
            cropped_img = self.transform(cropped_img)

        # Your initial pixel prediction (from first model)
        initial_pred_pixel = self.initial_predictions[idx]

        return cropped_img, torch.tensor([initial_pred_pixel], dtype=torch.float32), torch.tensor([gt_pixel_count], dtype=torch.float32)

transform_refiner = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

# Load your initial predictions (from previous inference):
initial_pixel_preds = submission_df['PixelCount'].values
refiner_dataset = PixelCountRefinerDataset(annotations, './train_dataset/train_dataset/', initial_pixel_preds, transform=transform_refiner)
refiner_loader = DataLoader(refiner_dataset, batch_size=16, shuffle=True)


In [24]:
class PixelRefiner(nn.Module):
    def __init__(self):
        super(PixelRefiner, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d((4,4))
        )
        self.fc_attention = nn.Sequential(
            nn.Linear(32*4*4 + 1, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.ReLU()
        )

    def forward(self, img, initial_pixel_count):
        features = self.conv_layers(img)
        features = features.view(features.size(0), -1)
        combined = torch.cat([features, initial_pixel_count], dim=1)
        refined_pixel_count = self.fc_attention(combined)
        return refined_pixel_count


In [25]:
refiner_model = PixelRefiner().to(device)


In [31]:
optimizer_refiner = torch.optim.Adam(refiner_model.parameters(), lr=1e-4)
criterion_refiner = nn.MSELoss()


epochs_refiner = 5
refiner_model.train()

for epoch in range(epochs_refiner):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in refiner_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(refiner_loader)
    print(f"Pixel Refiner Epoch [{epoch+1}/{epochs_refiner}], Loss: {avg_loss:.4f}")

torch.save(refiner_model.state_dict(), 'pixel_refiner.pth')


Pixel Refiner Epoch [1/5], Loss: 6023571.5272
Pixel Refiner Epoch [2/5], Loss: 5759808.7717
Pixel Refiner Epoch [3/5], Loss: 4607347.8870
Pixel Refiner Epoch [4/5], Loss: 2644551.9228
Pixel Refiner Epoch [5/5], Loss: 1697470.3673


In [33]:
refiner_model = PixelRefiner().to(device)
refiner_model.load_state_dict(torch.load('pixel_refiner.pth', map_location=device))


<All keys matched successfully>

In [34]:
optimizer_refiner = torch.optim.Adam(refiner_model.parameters(), lr=1e-4)
criterion_refiner = nn.MSELoss()


In [35]:
additional_epochs = 10  # Add how many more epochs you want

refiner_model.train()

for epoch in range(additional_epochs):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in refiner_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(refiner_loader)
    print(f"Continued Pixel Refiner Epoch [{epoch+1}/{additional_epochs}], Loss: {avg_loss:.4f}")


Continued Pixel Refiner Epoch [1/10], Loss: 1617895.6560
Continued Pixel Refiner Epoch [2/10], Loss: 1573489.3372
Continued Pixel Refiner Epoch [3/10], Loss: 1543149.4772
Continued Pixel Refiner Epoch [4/10], Loss: 1522792.0071
Continued Pixel Refiner Epoch [5/10], Loss: 1492897.9394
Continued Pixel Refiner Epoch [6/10], Loss: 1468624.6633
Continued Pixel Refiner Epoch [7/10], Loss: 1444724.7141
Continued Pixel Refiner Epoch [8/10], Loss: 1422006.9397
Continued Pixel Refiner Epoch [9/10], Loss: 1418509.6552
Continued Pixel Refiner Epoch [10/10], Loss: 1391945.7663


In [39]:
torch.save(refiner_model.state_dict(), 'pixel_refiner_continued.pth')
print("Refiner model saved after additional training epochs!")


Refiner model saved after additional training epochs!


In [37]:
additional_epochs = 30  # Add how many more epochs you want

refiner_model.train()

for epoch in range(additional_epochs):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in refiner_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(refiner_loader)
    print(f"Continued Pixel Refiner Epoch [{epoch+1}/{additional_epochs}], Loss: {avg_loss:.4f}")

Continued Pixel Refiner Epoch [1/30], Loss: 1399521.1486
Continued Pixel Refiner Epoch [2/30], Loss: 1360890.9394
Continued Pixel Refiner Epoch [3/30], Loss: 1347520.1874
Continued Pixel Refiner Epoch [4/30], Loss: 1334145.2549
Continued Pixel Refiner Epoch [5/30], Loss: 1327862.4568
Continued Pixel Refiner Epoch [6/30], Loss: 1310872.1761
Continued Pixel Refiner Epoch [7/30], Loss: 1293566.1733
Continued Pixel Refiner Epoch [8/30], Loss: 1294373.3609
Continued Pixel Refiner Epoch [9/30], Loss: 1271657.3269
Continued Pixel Refiner Epoch [10/30], Loss: 1273715.5709
Continued Pixel Refiner Epoch [11/30], Loss: 1291761.3894
Continued Pixel Refiner Epoch [12/30], Loss: 1253799.0845
Continued Pixel Refiner Epoch [13/30], Loss: 1245438.2226
Continued Pixel Refiner Epoch [14/30], Loss: 1221236.7787
Continued Pixel Refiner Epoch [15/30], Loss: 1211577.8880
Continued Pixel Refiner Epoch [16/30], Loss: 1226701.7560
Continued Pixel Refiner Epoch [17/30], Loss: 1190583.3511
Continued Pixel Refiner

In [40]:
torch.save(refiner_model.state_dict(), 'pixel_refiner_continued.pth')
print("Refiner model saved after additional training epochs!")

Refiner model saved after additional training epochs!


In [59]:
additional_epochs = 30  # Add how many more epochs you want

refiner_model.train()

for epoch in range(additional_epochs):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in refiner_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(refiner_loader)
    print(f"Continued Pixel Refiner Epoch [{epoch+1}/{additional_epochs}], Loss: {avg_loss:.4f}")

Continued Pixel Refiner Epoch [1/30], Loss: 890307.1343
Continued Pixel Refiner Epoch [2/30], Loss: 887078.9160
Continued Pixel Refiner Epoch [3/30], Loss: 892068.8313
Continued Pixel Refiner Epoch [4/30], Loss: 881282.6845
Continued Pixel Refiner Epoch [5/30], Loss: 882212.5920
Continued Pixel Refiner Epoch [6/30], Loss: 881468.0709
Continued Pixel Refiner Epoch [7/30], Loss: 879330.4565
Continued Pixel Refiner Epoch [8/30], Loss: 883346.9113
Continued Pixel Refiner Epoch [9/30], Loss: 886276.2048
Continued Pixel Refiner Epoch [10/30], Loss: 877052.1212
Continued Pixel Refiner Epoch [11/30], Loss: 875393.2920
Continued Pixel Refiner Epoch [12/30], Loss: 878483.9274
Continued Pixel Refiner Epoch [13/30], Loss: 873131.6448
Continued Pixel Refiner Epoch [14/30], Loss: 895449.7414
Continued Pixel Refiner Epoch [15/30], Loss: 872132.4972
Continued Pixel Refiner Epoch [16/30], Loss: 880617.4871
Continued Pixel Refiner Epoch [17/30], Loss: 871563.1726
Continued Pixel Refiner Epoch [18/30], L

In [60]:
torch.save(refiner_model.state_dict(), 'pixel_refiner_continued.pth')
print("Refiner model saved after additional training epochs!")

Refiner model saved after additional training epochs!


In [61]:
refiner_model.load_state_dict(torch.load('pixel_refiner_continued.pth', map_location=device))


<All keys matched successfully>

In [55]:
results = []
model.eval()
refiner_model.eval()

for file_name in test_files:
    image_id = int(file_name.split('.')[0])
    img_path = os.path.join(test_images_path, file_name)

    image = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = transform(img_rgb).unsqueeze(0).to(device)

    with torch.no_grad():
        pred_class, pred_bbox = model(img_tensor)

    pred_class = pred_class.item()
    pred_bbox = pred_bbox.cpu().numpy().flatten().astype(int)

    duck_present = 1 if pred_class > 0.5 else 0

    if duck_present:
        x1, y1, x2, y2 = pred_bbox
        bbox = f"{x1} {y1} {x2} {y2}"

        bbox_img = image[y1:y2, x1:x2]
        if bbox_img.size == 0:
            bbox_img = np.zeros((32,32,3), dtype=np.uint8)

        bbox_img_tensor = transform_refiner(bbox_img).unsqueeze(0).to(device)

        # initial prediction
        gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
        initial_pixel_count = cv2.countNonZero(thresh)
        initial_pixel_count_tensor = torch.tensor([[initial_pixel_count]], dtype=torch.float32).to(device)

        # refined prediction
        with torch.no_grad():
            refined_pixel_count = refiner_model(bbox_img_tensor, initial_pixel_count_tensor).item()

        pixel_count = int(refined_pixel_count)

    else:
        bbox = "0 0 0 0"
        pixel_count = 0

    results.append([image_id, duck_present, pixel_count, bbox])

submission_df = pd.DataFrame(results, columns=['DatapointID', 'DuckOrNoDuck', 'PixelCount', 'BoundingBox'])
submission_df.to_csv('submission.csv', index=False)


In [51]:
# First model (classification + bbox)
first_model = DuckDetector().to(device)
first_model.load_state_dict(torch.load('duck_detector_epoch30.pth', map_location=device))

# Second model (fixed refinement)
second_model = PixelRefiner().to(device)
second_model.load_state_dict(torch.load('pixel_refiner_continued.pth', map_location=device))
second_model.eval()  # ensure refinement model remains fixed




PixelRefiner(
  (conv_layers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): AdaptiveAvgPool2d(output_size=(4, 4))
  )
  (fc_attention): Sequential(
    (0): Linear(in_features=513, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
    (3): ReLU()
  )
)

In [52]:
optimizer = torch.optim.Adam(first_model.parameters(), lr=1e-4)
criterion_class = nn.BCELoss()
criterion_bbox = nn.MSELoss()

first_model.train()
additional_epochs = 10

for epoch in range(additional_epochs):
    total_loss = 0
    for images, labels, bboxes in loader:
        images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)

        optimizer.zero_grad()
        outputs_class, outputs_bbox = first_model(images)

        loss_class = criterion_class(outputs_class, labels)

        bbox_mask = labels.squeeze() == 1
        if bbox_mask.any():
            loss_bbox = criterion_bbox(outputs_bbox[bbox_mask], bboxes[bbox_mask])
        else:
            loss_bbox = torch.tensor(0.0).to(device)

        loss = loss_class + loss_bbox
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(loader)
    print(f"Continued Epoch [{epoch+1}/{additional_epochs}], Loss: {avg_loss:.4f}")


Continued Epoch [1/10], Loss: 1873.6875
Continued Epoch [2/10], Loss: 1808.1096
Continued Epoch [3/10], Loss: 1754.0883
Continued Epoch [4/10], Loss: 1796.5417
Continued Epoch [5/10], Loss: 1774.1952
Continued Epoch [6/10], Loss: 1731.2275
Continued Epoch [7/10], Loss: 1633.4038
Continued Epoch [8/10], Loss: 1604.5541
Continued Epoch [9/10], Loss: 1611.6569
Continued Epoch [10/10], Loss: 1664.2493


In [56]:
torch.save(first_model.state_dict(), 'duck_detector_epoch40.pth')

In [57]:
first_model.load_state_dict(torch.load('duck_detector_epoch40.pth', map_location=device))
first_model.eval()


DuckDetector(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, t

In [65]:
results = []
first_model.eval()
refiner_model.eval()

for file_name in test_files:
    image_id = int(file_name.split('.')[0])
    img_path = os.path.join(test_images_path, file_name)

    image = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = transform(img_rgb).unsqueeze(0).to(device)

    # FIXED THIS CLEARLY: Use the correct (trained) model here
    with torch.no_grad():
        pred_class, pred_bbox = first_model(img_tensor)

    pred_class = pred_class.item()
    pred_bbox = pred_bbox.cpu().numpy().flatten().astype(int)

    duck_present = 1 if pred_class > 0.5 else 0

    if duck_present:
        x1, y1, x2, y2 = pred_bbox
        bbox = f"{x1} {y1} {x2} {y2}"

        bbox_img = image[y1:y2, x1:x2]
        if bbox_img.size == 0:
            bbox_img = np.zeros((32,32,3), dtype=np.uint8)

        bbox_img_tensor = transform_refiner(bbox_img).unsqueeze(0).to(device)

        # initial prediction
        gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
        initial_pixel_count = cv2.countNonZero(thresh)
        initial_pixel_count_tensor = torch.tensor([[initial_pixel_count]], dtype=torch.float32).to(device)

        # refined prediction
        with torch.no_grad():
            refined_pixel_count = refiner_model(bbox_img_tensor, initial_pixel_count_tensor).item()

        pixel_count = int(refined_pixel_count)

    else:
        bbox = "0 0 0 0"
        pixel_count = 0

    results.append([image_id, duck_present, pixel_count, bbox])

submission_df = pd.DataFrame(results, columns=['DatapointID', 'DuckOrNoDuck', 'PixelCount', 'BoundingBox'])
submission_df.to_csv('submission.csv', index=False)



In [63]:
refiner_model.load_state_dict(torch.load('pixel_refiner_continued.pth', map_location=device))

additional_epochs = 30  # Add how many more epochs you want

refiner_model.train()

for epoch in range(additional_epochs):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in refiner_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(refiner_loader)
    print(f"Continued Pixel Refiner Epoch [{epoch + 1}/{additional_epochs}], Loss: {avg_loss:.4f}")
torch.save(refiner_model.state_dict(), 'pixel_refiner_continued.pth')
print("Refiner model saved after additional training epochs!")

Continued Pixel Refiner Epoch [1/30], Loss: 867247.4588
Continued Pixel Refiner Epoch [2/30], Loss: 880247.6973
Continued Pixel Refiner Epoch [3/30], Loss: 866849.7111
Continued Pixel Refiner Epoch [4/30], Loss: 864974.8598
Continued Pixel Refiner Epoch [5/30], Loss: 867865.4273
Continued Pixel Refiner Epoch [6/30], Loss: 872305.6505
Continued Pixel Refiner Epoch [7/30], Loss: 864377.0539
Continued Pixel Refiner Epoch [8/30], Loss: 887525.7345
Continued Pixel Refiner Epoch [9/30], Loss: 874765.7110
Continued Pixel Refiner Epoch [10/30], Loss: 873060.5463
Continued Pixel Refiner Epoch [11/30], Loss: 868677.5329
Continued Pixel Refiner Epoch [12/30], Loss: 864041.8428
Continued Pixel Refiner Epoch [13/30], Loss: 864085.9435
Continued Pixel Refiner Epoch [14/30], Loss: 863256.4262
Continued Pixel Refiner Epoch [15/30], Loss: 866231.1448
Continued Pixel Refiner Epoch [16/30], Loss: 867151.7416
Continued Pixel Refiner Epoch [17/30], Loss: 862925.5605
Continued Pixel Refiner Epoch [18/30], L

In [64]:
print(f"Continued Pixel Refiner Epoch [{epoch + 1}/{additional_epochs}], Loss: {avg_loss:.4f}")
torch.save(refiner_model.state_dict(), 'pixel_refiner_continued.pth')
print("Refiner model saved after additional training epochs!")

Continued Pixel Refiner Epoch [30/30], Loss: 862139.3560
Refiner model saved after additional training epochs!


In [66]:
# clearly define and load your freshly trained model
first_model = DuckDetector().to(device)
first_model.load_state_dict(torch.load('duck_detector_epoch40.pth', map_location=device))
first_model.eval()

# Also ensure refiner model is correctly loaded
refiner_model.load_state_dict(torch.load('pixel_refiner_continued.pth', map_location=device))
refiner_model.eval()




PixelRefiner(
  (conv_layers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): AdaptiveAvgPool2d(output_size=(4, 4))
  )
  (fc_attention): Sequential(
    (0): Linear(in_features=513, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
    (3): ReLU()
  )
)

In [67]:
results = []

for file_name in test_files:
    image_id = int(file_name.split('.')[0])
    img_path = os.path.join(test_images_path, file_name)

    image = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = transform(img_rgb).unsqueeze(0).to(device)

    # clearly use loaded first_model here
    with torch.no_grad():
        pred_class, pred_bbox = first_model(img_tensor)

    pred_class = pred_class.item()
    pred_bbox = pred_bbox.cpu().numpy().flatten().astype(int)

    duck_present = 1 if pred_class > 0.5 else 0

    if duck_present:
        x1, y1, x2, y2 = pred_bbox
        bbox = f"{x1} {y1} {x2} {y2}"

        bbox_img = image[y1:y2, x1:x2]
        if bbox_img.size == 0:
            bbox_img = np.zeros((32,32,3), dtype=np.uint8)

        bbox_img_tensor = transform_refiner(bbox_img).unsqueeze(0).to(device)

        # initial prediction
        gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
        initial_pixel_count = cv2.countNonZero(thresh)
        initial_pixel_count_tensor = torch.tensor([[initial_pixel_count]], dtype=torch.float32).to(device)

        # refined prediction
        with torch.no_grad():
            refined_pixel_count = refiner_model(bbox_img_tensor, initial_pixel_count_tensor).item()

        pixel_count = int(refined_pixel_count)

    else:
        bbox = "0 0 0 0"
        pixel_count = 0

    results.append([image_id, duck_present, pixel_count, bbox])

submission_df = pd.DataFrame(results, columns=['DatapointID', 'DuckOrNoDuck', 'PixelCount', 'BoundingBox'])
submission_df.to_csv('submission.csv', index=False)


In [68]:
optimizer = torch.optim.Adam(first_model.parameters(), lr=1e-4)
criterion_class = nn.BCELoss()
criterion_bbox = nn.MSELoss()

first_model.train()
additional_epochs = 10

for epoch in range(additional_epochs):
    total_loss = 0
    for images, labels, bboxes in loader:
        images, labels, bboxes = images.to(device), labels.to(device), bboxes.to(device)

        optimizer.zero_grad()
        outputs_class, outputs_bbox = first_model(images)

        loss_class = criterion_class(outputs_class, labels)

        bbox_mask = labels.squeeze() == 1
        if bbox_mask.any():
            loss_bbox = criterion_bbox(outputs_bbox[bbox_mask], bboxes[bbox_mask])
        else:
            loss_bbox = torch.tensor(0.0).to(device)

        loss = loss_class + loss_bbox
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(loader)
    print(f"Continued Epoch [{epoch+1}/{additional_epochs}], Loss: {avg_loss:.4f}")


Continued Epoch [1/10], Loss: 1570.5534
Continued Epoch [2/10], Loss: 1524.5895
Continued Epoch [3/10], Loss: 1516.5384
Continued Epoch [4/10], Loss: 1511.1019
Continued Epoch [5/10], Loss: 1381.4297
Continued Epoch [6/10], Loss: 1363.5008
Continued Epoch [7/10], Loss: 1386.3165
Continued Epoch [8/10], Loss: 1304.4864
Continued Epoch [9/10], Loss: 1294.9442
Continued Epoch [10/10], Loss: 1266.0421


In [69]:
torch.save(first_model.state_dict(), 'duck_detector_epoch40.pth')
first_model.load_state_dict(torch.load('duck_detector_epoch40.pth', map_location=device))
first_model.eval()


DuckDetector(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, t

In [70]:
results = []

for file_name in test_files:
    image_id = int(file_name.split('.')[0])
    img_path = os.path.join(test_images_path, file_name)

    image = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_tensor = transform(img_rgb).unsqueeze(0).to(device)

    # clearly use loaded first_model here
    with torch.no_grad():
        pred_class, pred_bbox = first_model(img_tensor)

    pred_class = pred_class.item()
    pred_bbox = pred_bbox.cpu().numpy().flatten().astype(int)

    duck_present = 1 if pred_class > 0.5 else 0

    if duck_present:
        x1, y1, x2, y2 = pred_bbox
        bbox = f"{x1} {y1} {x2} {y2}"

        bbox_img = image[y1:y2, x1:x2]
        if bbox_img.size == 0:
            bbox_img = np.zeros((32,32,3), dtype=np.uint8)

        bbox_img_tensor = transform_refiner(bbox_img).unsqueeze(0).to(device)

        # initial prediction
        gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
        initial_pixel_count = cv2.countNonZero(thresh)
        initial_pixel_count_tensor = torch.tensor([[initial_pixel_count]], dtype=torch.float32).to(device)

        # refined prediction
        with torch.no_grad():
            refined_pixel_count = refiner_model(bbox_img_tensor, initial_pixel_count_tensor).item()

        pixel_count = int(refined_pixel_count)

    else:
        bbox = "0 0 0 0"
        pixel_count = 0

    results.append([image_id, duck_present, pixel_count, bbox])

submission_df = pd.DataFrame(results, columns=['DatapointID', 'DuckOrNoDuck', 'PixelCount', 'BoundingBox'])
submission_df.to_csv('submission.csv', index=False)

In [71]:
first_model.eval()
updated_refiner_data = []

for images, labels, gt_bboxes in loader:
    images, labels = images.to(device), labels.to(device)

    with torch.no_grad():
        pred_class, pred_bbox = first_model(images)

    pred_bbox = pred_bbox.cpu().numpy().astype(int)
    images_np = images.cpu().numpy()

    for idx in range(len(images)):
        if labels[idx].item() == 1:
            x1, y1, x2, y2 = pred_bbox[idx]
            img = images_np[idx].transpose(1,2,0) * 255
            img = img.astype(np.uint8)

            bbox_img = img[y1:y2, x1:x2]
            if bbox_img.size == 0:
                bbox_img = np.zeros((32,32,3), dtype=np.uint8)

            # Ground truth pixel count
            gt_bbox = gt_bboxes[idx].cpu().numpy().astype(int)
            gt_x1, gt_y1, gt_x2, gt_y2 = gt_bbox
            gt_pixel_count = (gt_x2 - gt_x1) * (gt_y2 - gt_y1)

            # Initial prediction using thresholding
            gray_bbox_img = cv2.cvtColor(bbox_img, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(gray_bbox_img, 10, 255, cv2.THRESH_BINARY_INV)
            initial_pixel_count = cv2.countNonZero(thresh)

            updated_refiner_data.append((bbox_img, initial_pixel_count, gt_pixel_count))


In [72]:
class UpdatedRefinerDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, initial_pred, gt_pixel_count = self.data[idx]
        if self.transform:
            img = self.transform(img)
        return img, torch.tensor([initial_pred], dtype=torch.float32), torch.tensor([gt_pixel_count], dtype=torch.float32)

transform_refiner = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((64,64)),
    transforms.ToTensor(),
])

updated_dataset = UpdatedRefinerDataset(updated_refiner_data, transform=transform_refiner)
updated_loader = DataLoader(updated_dataset, batch_size=16, shuffle=True)


In [77]:
# Clearly reload previously trained Pixel Refiner
refiner_model.load_state_dict(torch.load('pixel_refiner_continued.pth', map_location=device))
refiner_model.train()

criterion_refiner = nn.MSELoss()
optimizer_refiner = torch.optim.Adam(refiner_model.parameters(), lr=1e-5)  # use lower LR for fine-tuning

fine_tune_epochs = 500
for epoch in range(fine_tune_epochs):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in updated_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(updated_loader)
    print(f"Fine-tune Pixel Refiner Epoch [{epoch+1}/{fine_tune_epochs}], Loss: {avg_loss:.4f}")

# Save fine-tuned Pixel Refiner clearly
torch.save(refiner_model.state_dict(), 'pixel_refiner_finetuned.pth')
print("Pixel Refiner fine-tuned and saved successfully!")


Fine-tune Pixel Refiner Epoch [1/500], Loss: 178285390.2796
Fine-tune Pixel Refiner Epoch [2/500], Loss: 177023110.8387
Fine-tune Pixel Refiner Epoch [3/500], Loss: 175386419.4839
Fine-tune Pixel Refiner Epoch [4/500], Loss: 174279361.2043
Fine-tune Pixel Refiner Epoch [5/500], Loss: 172622228.8172
Fine-tune Pixel Refiner Epoch [6/500], Loss: 171037992.9032
Fine-tune Pixel Refiner Epoch [7/500], Loss: 169389656.0000
Fine-tune Pixel Refiner Epoch [8/500], Loss: 167824576.3441
Fine-tune Pixel Refiner Epoch [9/500], Loss: 166127732.3011
Fine-tune Pixel Refiner Epoch [10/500], Loss: 164909853.0753
Fine-tune Pixel Refiner Epoch [11/500], Loss: 162903632.2581
Fine-tune Pixel Refiner Epoch [12/500], Loss: 161154578.1935
Fine-tune Pixel Refiner Epoch [13/500], Loss: 159504335.0968
Fine-tune Pixel Refiner Epoch [14/500], Loss: 158024813.2473
Fine-tune Pixel Refiner Epoch [15/500], Loss: 156422699.7849
Fine-tune Pixel Refiner Epoch [16/500], Loss: 154515551.2688
Fine-tune Pixel Refiner Epoch [17

In [76]:



refiner_model.load_state_dict(torch.load('pixel_refiner_finetuned.pth', map_location=device))
refiner_model.eval()


PixelRefiner(
  (conv_layers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): AdaptiveAvgPool2d(output_size=(4, 4))
  )
  (fc_attention): Sequential(
    (0): Linear(in_features=513, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
    (3): ReLU()
  )
)

In [78]:
torch.save(refiner_model.state_dict(), 'pixel_refiner_finetuned.pth')
print("Pixel Refiner fine-tuned and saved successfully!")

Pixel Refiner fine-tuned and saved successfully!


In [79]:
refiner_model.load_state_dict(torch.load('pixel_refiner_finetuned.pth', map_location=device))
refiner_model.train()

criterion_refiner = nn.MSELoss()
optimizer_refiner = torch.optim.Adam(refiner_model.parameters(), lr=1e-5)  # use lower LR for fine-tuning

fine_tune_epochs = 500
for epoch in range(fine_tune_epochs):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in updated_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(updated_loader)
    print(f"Fine-tune Pixel Refiner Epoch [{epoch+1}/{fine_tune_epochs}], Loss: {avg_loss:.4f}")

# Save fine-tuned Pixel Refiner clearly
torch.save(refiner_model.state_dict(), 'pixel_refiner_finetuned.pth')
print("Pixel Refiner fine-tuned and saved successfully!")


Fine-tune Pixel Refiner Epoch [1/500], Loss: 67394513.4839
Fine-tune Pixel Refiner Epoch [2/500], Loss: 67335424.3011
Fine-tune Pixel Refiner Epoch [3/500], Loss: 67237186.3656
Fine-tune Pixel Refiner Epoch [4/500], Loss: 67245135.1183
Fine-tune Pixel Refiner Epoch [5/500], Loss: 67308916.3656
Fine-tune Pixel Refiner Epoch [6/500], Loss: 67252974.8387
Fine-tune Pixel Refiner Epoch [7/500], Loss: 67178976.9032
Fine-tune Pixel Refiner Epoch [8/500], Loss: 67238008.5591
Fine-tune Pixel Refiner Epoch [9/500], Loss: 67201816.5914
Fine-tune Pixel Refiner Epoch [10/500], Loss: 67256056.1290
Fine-tune Pixel Refiner Epoch [11/500], Loss: 67155746.2151
Fine-tune Pixel Refiner Epoch [12/500], Loss: 67121633.0968
Fine-tune Pixel Refiner Epoch [13/500], Loss: 67202710.8602
Fine-tune Pixel Refiner Epoch [14/500], Loss: 67175102.8387
Fine-tune Pixel Refiner Epoch [15/500], Loss: 67177877.0753
Fine-tune Pixel Refiner Epoch [16/500], Loss: 67078740.3871
Fine-tune Pixel Refiner Epoch [17/500], Loss: 671

In [80]:
# Load existing Pixel Refiner clearly
refiner_model.load_state_dict(torch.load('pixel_refiner_continued.pth', map_location=device))
refiner_model.train()

criterion_refiner = nn.MSELoss()
optimizer_refiner = torch.optim.Adam(refiner_model.parameters(), lr=1e-6)

fine_tune_epochs = 10
for epoch in range(fine_tune_epochs):
    total_loss = 0
    for imgs, initial_preds, gt_pixels in updated_loader:
        imgs, initial_preds, gt_pixels = imgs.to(device), initial_preds.to(device), gt_pixels.to(device)

        optimizer_refiner.zero_grad()
        refined_output = refiner_model(imgs, initial_preds)
        loss = criterion_refiner(refined_output, gt_pixels)
        loss.backward()
        optimizer_refiner.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(updated_loader)
    print(f"Fine-tune Pixel Refiner Epoch [{epoch+1}/{fine_tune_epochs}], Loss: {avg_loss:.4f}")

torch.save(refiner_model.state_dict(), 'pixel_refiner_finetuned.pth')
print("Pixel Refiner fine-tuned and saved!")


Fine-tune Pixel Refiner Epoch [1/10], Loss: 178960180.2151
Fine-tune Pixel Refiner Epoch [2/10], Loss: 178873978.5376
Fine-tune Pixel Refiner Epoch [3/10], Loss: 178702388.3011
Fine-tune Pixel Refiner Epoch [4/10], Loss: 178503169.2043
Fine-tune Pixel Refiner Epoch [5/10], Loss: 178424672.1290


KeyboardInterrupt: 

In [83]:
yolo detect predict model=runs/detect/train/weights/best.pt source=./test_dataset/test_dataset/ save_txt save_conf


SyntaxError: invalid syntax (3995509010.py, line 1)