# 1) Geliştirme: CoordConv’u sadece kritik yerlere koyalım

CoordConv’u her katmana koymak çoğu zaman gereksiz. En iyi kullanım:

* Stem (erken konum bilgisi)

* veya Head (koordinat regresyon/heatmap üretimi)

Aşağıdaki model: stem’de CoordConv, sonra residual bloklarla devam.

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def make_coord_channels(B, H, W, device, dtype, with_r=False):
    y = torch.linspace(-1.0, 1.0, steps=H, device=device, dtype=dtype)
    x = torch.linspace(-1.0, 1.0, steps=W, device=device, dtype=dtype)
    yy, xx = torch.meshgrid(y, x, indexing='ij')
    xx = xx[None, None].repeat(B, 1, 1, 1)
    yy = yy[None, None].repeat(B, 1, 1, 1)
    if with_r:
        rr = torch.sqrt(xx**2 + yy**2)
        return torch.cat([xx, yy, rr], dim=1)
    return torch.cat([xx, yy], dim=1)

class CoordConv2d(nn.Module):
    def __init__(self, cin, cout, k=3, stride=1, padding=1, with_r=False, bias=False):
        super().__init__()
        self.with_r = with_r
        extra = 3 if with_r else 2
        self.conv = nn.Conv2d(cin + extra, cout, k, stride=stride, padding=padding, bias=bias)

    def forward(self, x):
        B, C, H, W = x.shape
        coords = make_coord_channels(B, H, W, device=x.device, dtype=x.dtype, with_r=self.with_r)
        return self.conv(torch.cat([x, coords], dim=1))

class ConvBNAct(nn.Module):
    def __init__(self, cin, cout, k=3, stride=1, padding=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(cin, cout, k, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(cout),
            nn.ReLU(inplace=True),
        )
    def forward(self, x): return self.net(x)

class BasicResBlock(nn.Module):
    def __init__(self, cin, cout, stride=1):
        super().__init__()
        self.conv1 = ConvBNAct(cin, cout, 3, stride=stride, padding=1)
        self.conv2 = nn.Sequential(
            nn.Conv2d(cout, cout, 3, padding=1, bias=False),
            nn.BatchNorm2d(cout),
        )
        self.skip = nn.Identity() if (cin == cout and stride == 1) else nn.Sequential(
            nn.Conv2d(cin, cout, 1, stride=stride, bias=False),
            nn.BatchNorm2d(cout),
        )
        self.act = nn.ReLU(inplace=True)

    def forward(self, x):
        y = self.conv1(x)
        y = self.conv2(y)
        y = y + self.skip(x)
        return self.act(y)

class CoordResNetTiny(nn.Module):
    def __init__(self, in_channels=3, num_classes=10, with_r=True):
        super().__init__()
        self.stem = nn.Sequential(
            CoordConv2d(in_channels, 32, k=3, stride=1, padding=1, with_r=with_r, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        self.stage1 = BasicResBlock(32, 64, stride=2)
        self.stage2 = BasicResBlock(64, 128, stride=2)
        self.stage3 = BasicResBlock(128, 256, stride=2)
        self.pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x, verbose=False):
        if verbose:
            print("in  :", x.shape)

        x = self.stem(x)
        if verbose:
            print("stem:", x.shape)

        x = self.stage1(x)
        if verbose:
            print("s1  :", x.shape)

        x = self.stage2(x)
        if verbose:
            print("s2  :", x.shape)

        x = self.stage3(x)
        if verbose:
            print("s3  :", x.shape)

        x = self.pool(x)
        x = torch.flatten(x, 1)
        out = self.fc(x)

        if verbose:
            print("out :", out.shape)

        return out

# test
if __name__ == "__main__":
    m = CoordResNetTiny(with_r=True)
    y = m(torch.randn(2,3,64,64), verbose=True)


in  : torch.Size([2, 3, 64, 64])
stem: torch.Size([2, 32, 32, 32])
s1  : torch.Size([2, 64, 16, 16])
s2  : torch.Size([2, 128, 8, 8])
s3  : torch.Size([2, 256, 4, 4])
out : torch.Size([2, 10])


# 2) Geliştirme: CoordConv’u “head”e koyup koordinat regresyonu yaptıralım (CoordConv’un asıl olayı)

Eğer hedefimiz “konum çıkarmak” ise (x,y), sınıflandırma yerine bu daha doğru:

* Backbone feature çıkarır

* Head CoordConv ile (x,y) üretir

### Dikkat edilmesi gerekenler

* Çıkış: xy ∈ [-1, 1] aralığında 2 değer (x, y)

* Eğitimde GT (ground-truth) da aynı aralıkta olmalı.

* Loss: SmoothL1Loss (regresyon için sağlam

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def create_coord_maps(batch_size, height, width, device, dtype, add_radius=False):
    y_lin = torch.linspace(-1.0, 1.0, steps=height, device=device, dtype=dtype)
    x_lin = torch.linspace(-1.0, 1.0, steps=width,  device=device, dtype=dtype)

    yy, xx = torch.meshgrid(y_lin, x_lin, indexing='ij')  # (H,W)

    xx = xx[None, None].repeat(batch_size, 1, 1, 1)  # (B,1,H,W)
    yy = yy[None, None].repeat(batch_size, 1, 1, 1)  # (B,1,H,W)

    if add_radius:
        rr = torch.sqrt(xx**2 + yy**2)
        return torch.cat([xx, yy, rr], dim=1)  # (B,3,H,W)

    return torch.cat([xx, yy], dim=1)          # (B,2,H,W)


class CoordConvLayer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, add_radius=False, bias=False):
        super().__init__()
        self.add_radius = add_radius
        extra = 3 if add_radius else 2
        self.conv = nn.Conv2d(
            in_channels + extra,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=bias
        )

    def forward(self, x):
        b, c, h, w = x.shape
        coords = create_coord_maps(
            batch_size=b,
            height=h,
            width=w,
            device=x.device,
            dtype=x.dtype,
            add_radius=self.add_radius
        )
        x = torch.cat([x, coords], dim=1)
        return self.conv(x)


class ConvBNReLU(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.net(x)


class CoordXYRegressor(nn.Module):
    def __init__(self, in_channels=3, add_radius=True):
        super().__init__()

        self.backbone = nn.Sequential(
            ConvBNReLU(in_channels, 32, 3, 1, 1),
            nn.MaxPool2d(2),
            ConvBNReLU(32, 64, 3, 2, 1),
            ConvBNReLU(64, 128, 3, 2, 1),
        )

        self.head = nn.Sequential(
            CoordConvLayer(128, 128, kernel_size=3, stride=1, padding=1, add_radius=add_radius, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1)),
        )

        self.fc = nn.Linear(128, 2)

    def forward(self, x, verbose=False):
        if verbose: print("in      :", x.shape)

        feats = self.backbone(x)
        if verbose: print("features :", feats.shape)

        h = self.head(feats)
        if verbose: print("head     :", h.shape)

        h = torch.flatten(h, 1)
        xy = torch.tanh(self.fc(h))  # (B,2) in [-1,1]

        if verbose: print("xy       :", xy.shape)
        return xy


In [29]:
model = CoordXYRegressor(in_channels=3, add_radius=True)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.SmoothL1Loss()

x = torch.randn(8, 3, 64, 64)
gt_xy = torch.empty(8, 2).uniform_(-1, 1)  # sahte ground-truth

pred_xy = model(x, verbose=True)
loss = loss_fn(pred_xy, gt_xy)

opt.zero_grad()
loss.backward()
opt.step()

print("loss:", float(loss))


in      : torch.Size([8, 3, 64, 64])
features : torch.Size([8, 128, 8, 8])
head     : torch.Size([8, 128, 1, 1])
xy       : torch.Size([8, 2])
loss: 0.23944737017154694


Consider using tensor.detach() first. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\torch\csrc\autograd\generated\python_variable_methods.cpp:837.)
  print("loss:", float(loss))


# 3) Geliştirme

* CoordConv + Attention (SE/CBAM) kombinasyonu.

Mantık çok basit ve net:
 
* CoordConv modele mutlak konumu verir (x/y ve opsiyonel r kanalları).

* Attention (SE veya CBAM) ise bu yeni gelen bilgiyle birlikte hangi kanal / hangi özellik önemliyse onu yükseltir, gereksizleri bastırır.

Yani ikisi beraber:

* “Konumu gör” + “doğru özelliğe odaklan”

* Özellikle localization / regression işlerinde head kısmında faydalı olur.

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SeBlock(nn.Module):
    def __init__(self, channels, reductions=8):
        super().__init__()
        hidden = max(1, channels // reductions)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Conv2d(channels, hidden, kernel_size=1)
        self.fc2 = nn.Conv2d(hidden, channels, kernel_size=1)

    def forward(self, x):
        s = self.pool(x)
        s = F.relu(self.fc1(s), inplace=True)
        s = torch.sigmoid(self.fc2(s))
        return x * s

def cords(B, H, W, device, dtype, add_rad=False):
    y = torch.linspace(-1.0, 1.0, steps=H, device=device, dtype=dtype)
    x = torch.linspace(-1.0, 1.0, steps=W, device=device, dtype=dtype)

    yy, xx = torch.meshgrid(y, x, indexing="ij")  # (H,W)

    xx = xx[None, None].repeat(B, 1, 1, 1)  # (B,1,H,W)
    yy = yy[None, None].repeat(B, 1, 1, 1)  # (B,1,H,W)

    if add_rad:
        rr = torch.sqrt(xx**2 + yy**2)
        return torch.cat([xx, yy, rr], dim=1)  # (B,3,H,W)
    return torch.cat([xx, yy], dim=1)          # (B,2,H,W)

class CordConv(nn.Module):
    def __init__(self, cin, cout, k=3, stride=1, padding=1, add_rad=False, bias=False):
        super().__init__()
        self.add_rad = add_rad
        extras = 3 if add_rad else 2
        self.conv_1 = nn.Conv2d(cin + extras, cout, kernel_size=k, stride=stride, padding=padding, bias=bias)

    def forward(self, x):
        b, c, h, w = x.shape
        coords = cords(b, h, w, device=x.device, dtype=x.dtype, add_rad=self.add_rad)
        x = torch.cat([x, coords], dim=1)
        return self.conv_1(x)

class ConvBNReLU(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.net(x)

class CoordXYRegr(nn.Module):
    def __init__(self, in_channels=3, add_rad=True, se_reduction=10):
        super().__init__()
        self.backbone = nn.Sequential(
            ConvBNReLU(in_channels, 32, 3, 1, 1),
            nn.MaxPool2d(2),
            ConvBNReLU(32, 64, 3, 2, 1),
            ConvBNReLU(64, 128, 3, 2, 1),
        )
        self.head = nn.Sequential(
            CordConv(128, 128, k=3, stride=1, padding=1, add_rad=add_rad, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            SeBlock(128, reductions=se_reduction),
            nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.fc = nn.Linear(128, 2)

    def forward(self, x, verbose=False):
        if verbose: print("in      :", x.shape)
        feats = self.backbone(x)
        if verbose: print("features :", feats.shape)
        h = self.head(feats)
        if verbose: print("head     :", h.shape)
        h = torch.flatten(h, 1)
        xy = torch.tanh(self.fc(h))
        if verbose: print("xy       :", xy.shape)
        return xy

In [None]:
m = CoordXYRegr(in_channels=3, add_rad=True)
x = torch.randn(8, 3, 64, 64)
y = m(x, verbose=True)
print(y.shape)

in      : torch.Size([8, 3, 64, 64])
features : torch.Size([8, 128, 8, 8])
head     : torch.Size([8, 128, 1, 1])
xy       : torch.Size([8, 2])
torch.Size([8, 2])
