In [1]:
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
import pickle
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

In [2]:
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        if self.transform:
            image = self.transform(sample["image"])
        return image, sample["embedding"].flatten()

In [None]:
with open("train_dataset_node_3.pkl", "rb") as f:
    train_dataset = pickle.load(f)

with open("test_dataset_node_4.pkl", "rb") as f:
    test_dataset = pickle.load(f)

with open("merged_dataset.pkl", "rb") as f:
    merged_dataset = pickle.load(f)

idx =100
print("Length of train dataset: ", len(train_dataset))
print("Length of test dataset: ", len(test_dataset))

print("Train dataset keys: ", train_dataset[idx].keys())
print("Train dataset image shape: ", train_dataset[idx]["image"].shape)
print("Train dataset image name: ", train_dataset[idx]["image_name"])
print("Train dataset embedding shape: ", train_dataset[idx]["embedding"].shape)

print("Test dataset keys: ", test_dataset[idx].keys())
print("Test dataset image shape: ", test_dataset[idx]["image"].shape)
print("Test dataset image name: ", test_dataset[idx]["image_name"])
print("Test dataset embedding shape: ", test_dataset[idx]["embedding"].shape)


train_image = Image.fromarray(train_dataset[idx]["image"])
test_image = Image.fromarray(test_dataset[idx]["image"])

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(train_image)
plt.axis("off")
plt.title("Train Image")

plt.subplot(1, 2, 2)
plt.imshow(test_image)
plt.axis("off")
plt.title("Test Image")

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_data = CustomDataset(merged_dataset, transform=transform)

In [4]:
class EmbeddingToImageDecoder(nn.Module):
    def __init__(self, embedding_size=384):
        super(EmbeddingToImageDecoder, self).__init__()
        
        self.mlp = nn.Sequential(
            nn.Linear(embedding_size, 1024),
            nn.ReLU(),
            nn.Linear(1024, 2048),
            nn.ReLU(),
            nn.Linear(2048, 4096),
            nn.ReLU(),
            nn.Linear(4096, 8192),
            nn.ReLU(),
            nn.Linear(8192, 7 * 7 * 256),
            nn.ReLU()
        )
        
        self.upconv = nn.Sequential(
            nn.ConvTranspose2d(256, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),

            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2),
            
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.2),
            
            nn.ConvTranspose2d(32, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.mlp(x)
        x = x.view(x.size(0), 256, 7, 7)  # (Batch, Channels, Height, Width)
        x = self.upconv(x)
        return x


In [5]:
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        identity = self.shortcut(x)
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity
        out = self.relu(out)
        return out

class ViTDecoderWithResiduals(nn.Module):
    def __init__(self, embedding_dim=384):
        super(ViTDecoderWithResiduals, self).__init__()
        
        # MLP: Embedding'den özellik haritasına geçiş
        self.mlp = nn.Sequential(
            nn.Linear(embedding_dim, 7 * 7 * 256),
            nn.ReLU()
        )
        
        # Yukarı örnekleme ve residual block'lar
        self.upconv1 = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),  # 8x8 → 16x16
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2)
        )
        self.residual1 = ResidualBlock(128, 128)
        
        self.upconv2 = nn.Sequential(
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),  # 16x16 → 32x32
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2)
        )
        self.residual2 = ResidualBlock(64, 64)
        
        self.upconv3 = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),  # 32x32 → 64x64
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.2)
        )
        self.residual3 = ResidualBlock(32, 32)
        
        self.upconv4 = nn.Sequential(
            nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),  # 64x64 → 128x128
            nn.BatchNorm2d(16),
            nn.LeakyReLU(0.2)
        )
        self.residual4 = ResidualBlock(16, 16)
        
        # Son katman
        self.upconv5 = nn.Sequential(
            nn.ConvTranspose2d(16, 3, kernel_size=4, stride=2, padding=1),  # 128x128 → 224x224
        )
    
    def forward(self, z):
        x = self.mlp(z)
        x = x.view(x.size(0), 256, 7, 7)  # Görüntü formatına çevir
        
        x = self.upconv1(x)
        x = self.residual1(x)
        
        x = self.upconv2(x)
        x = self.residual2(x)
        
        x = self.upconv3(x)
        x = self.residual3(x)
        
        x = self.upconv4(x)
        x = self.residual4(x)
        
        x = self.upconv5(x)
        x = torch.tanh(x)  # [-1, 1] aralığı
        return x


In [6]:
def train_model(train_loader, model, optimizer, criterion, device, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        for image, embedding in train_loader:
            embedding = torch.tensor(embedding).unsqueeze(1).unsqueeze(1)
            embedding = embedding.to(device)
            image = image.to(device)
            optimizer.zero_grad()
            output = model(embedding)
            loss = criterion(output, image)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch + 1}, Train Loss: {loss.item()}")

In [7]:
def get_batch_loader(train_data, batch_size=32, shuffle=True):
    return DataLoader(train_data, batch_size=batch_size, shuffle=shuffle)

In [None]:
model = ViTDecoderWithResiduals()
model.train()
device = torch.device("mps" if torch.mps.is_available() else "cpu")
print(device)
model.to('cpu')
x = torch.randn(1, 384)
torch.onnx.export(model, x, "model.onnx")

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
criterion = nn.L1Loss()
batch_size = 32
train_loader = get_batch_loader(train_data, batch_size=batch_size)
train_model(train_loader, model, optimizer, criterion, device, num_epochs=100)

In [None]:
# load state dict
model.load_state_dict(torch.load("vit_decoder_with_residuals.pth"))
model.to(device)

In [None]:
img = test_dataset[100]["image"]
plt.imshow(np.array(img) / 255.0)
plt.show()
loaded_model = tf.keras.layers.TFSMLayer("/Users/cilem/.cache/huggingface/hub/models--google--path-foundation/snapshots/fd6a835ceaae15be80db6abd8dcfeb86a9287e72", call_endpoint='serving_default')
infer = loaded_model.call
embeddings = infer(tf.constant(tensor))
embedding_vector = embeddings['output_0'].numpy().flatten()

random_embedding = torch.randn(384)
embedding = torch.tensor(random_embedding).unsqueeze(0)
embedding = embedding.to("mps")
output = model(embedding)
output = output.squeeze(0).detach().cpu().numpy()
output = np.clip(output, 0, 1)
output = np.moveaxis(output, 0, -1)
plt.imshow(output)
plt.show()

In [None]:
import torch
from torch import nn
from torchsummary import summary

# Basit bir PyTorch modeli tanımla
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(32, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x

# Modeli oluştur
model = SimpleModel()

# Bir örnek giriş
x = torch.randn(1, 32)


from torchviz import make_dot


y = model(x)

# Görselleştirme:
dot = make_dot(y, params=dict(model.named_parameters()))
dot.render("model_architecture", format="png")  # PNG olarak kaydeder


In [2]:
import torch.onnx

# Modeli .onnx formatına kaydet
torch.onnx.export(model, x, "model.onnx")
