In [None]:

import numpy as np
import torch
import torch.nn as nn


def get_emb(sin_inp):
    """
    Gets a base embedding for one dimension with sin and cos intertwined
    """
    emb = torch.stack((sin_inp.sin(), sin_inp.cos()), dim=-1)
    return torch.flatten(emb, -2, -1)


class PositionalEncoding1D(nn.Module):
    def __init__(self, channels):
        """
        :param channels: The last dimension of the tensor you want to apply pos emb to.
        """
        super(PositionalEncoding1D, self).__init__()
        self.org_channels = channels
        channels = int(np.ceil(channels / 2) * 2)
        self.channels = channels
        inv_freq = 1.0 / (10000 ** (torch.arange(0, channels, 2).float() / channels))
        self.register_buffer("inv_freq", inv_freq)
        self.register_buffer("cached_penc", None, persistent=False)

    def forward(self, tensor):
        """
        :param tensor: A 3d tensor of size (batch_size, x, ch)
        :return: Positional Encoding Matrix of size (batch_size, x, ch)
        """
        if len(tensor.shape) != 3:
            raise RuntimeError("The input tensor has to be 3d!")

        if self.cached_penc is not None and self.cached_penc.shape == tensor.shape:
            return self.cached_penc

        self.cached_penc = None
        batch_size, x, orig_ch = tensor.shape
        pos_x = torch.arange(x, device=tensor.device, dtype=self.inv_freq.dtype)
        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
        emb_x = get_emb(sin_inp_x)
        emb = torch.zeros((x, self.channels), device=tensor.device, dtype=tensor.dtype)
        emb[:, : self.channels] = emb_x

        self.cached_penc = emb[None, :, :orig_ch].repeat(batch_size, 1, 1)
        return self.cached_penc


class PositionalEncodingPermute1D(nn.Module):
    def __init__(self, channels):
        """
        Accepts (batchsize, ch, x) instead of (batchsize, x, ch)
        """
        super(PositionalEncodingPermute1D, self).__init__()
        self.penc = PositionalEncoding1D(channels)

    def forward(self, tensor):
        tensor = tensor.permute(0, 2, 1)
        enc = self.penc(tensor)
        return enc.permute(0, 2, 1)

    @property
    def org_channels(self):
        return self.penc.org_channels


class PositionalEncoding2D(nn.Module):
    def __init__(self, channels):
        """
        :param channels: The last dimension of the tensor you want to apply pos emb to.
        """
        super(PositionalEncoding2D, self).__init__()
        self.org_channels = channels
        channels = int(np.ceil(channels / 4) * 2)
        self.channels = channels
        inv_freq = 1.0 / (10000 ** (torch.arange(0, channels, 2).float() / channels))
        self.register_buffer("inv_freq", inv_freq)
        self.register_buffer("cached_penc", None, persistent=False)

    def forward(self, tensor):
        """
        :param tensor: A 4d tensor of size (batch_size, x, y, ch)
        :return: Positional Encoding Matrix of size (batch_size, x, y, ch)
        """
        if len(tensor.shape) != 4:
            raise RuntimeError("The input tensor has to be 4d!")

        if self.cached_penc is not None and self.cached_penc.shape == tensor.shape:
            return self.cached_penc

        self.cached_penc = None
        batch_size, x, y, orig_ch = tensor.shape
        pos_x = torch.arange(x, device=tensor.device, dtype=self.inv_freq.dtype)
        pos_y = torch.arange(y, device=tensor.device, dtype=self.inv_freq.dtype)
        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
        sin_inp_y = torch.einsum("i,j->ij", pos_y, self.inv_freq)
        emb_x = get_emb(sin_inp_x).unsqueeze(1)
        emb_y = get_emb(sin_inp_y)
        emb = torch.zeros(
            (x, y, self.channels * 2),
            device=tensor.device,
            dtype=tensor.dtype,
        )
        emb[:, :, : self.channels] = emb_x
        emb[:, :, self.channels : 2 * self.channels] = emb_y

        self.cached_penc = emb[None, :, :, :orig_ch].repeat(tensor.shape[0], 1, 1, 1)
        return self.cached_penc


class PositionalEncodingPermute2D(nn.Module):
    def __init__(self, channels):
        """
        Accepts (batchsize, ch, x, y) instead of (batchsize, x, y, ch)
        """
        super(PositionalEncodingPermute2D, self).__init__()
        self.penc = PositionalEncoding2D(channels)

    def forward(self, tensor):
        tensor = tensor.permute(0, 2, 3, 1)
        enc = self.penc(tensor)
        return enc.permute(0, 3, 1, 2)

    @property
    def org_channels(self):
        return self.penc.org_channels


class PositionalEncoding3D(nn.Module):
    def __init__(self, channels):
        """
        :param channels: The last dimension of the tensor you want to apply pos emb to.
        """
        super(PositionalEncoding3D, self).__init__()
        self.org_channels = channels
        channels = int(np.ceil(channels / 6) * 2)
        if channels % 2:
            channels += 1
        self.channels = channels
        inv_freq = 1.0 / (10000 ** (torch.arange(0, channels, 2).float() / channels))
        self.register_buffer("inv_freq", inv_freq)
        self.register_buffer("cached_penc", None, persistent=False)

    def forward(self, tensor):
        """
        :param tensor: A 5d tensor of size (batch_size, x, y, z, ch)
        :return: Positional Encoding Matrix of size (batch_size, x, y, z, ch)
        """
        if len(tensor.shape) != 5:
            raise RuntimeError("The input tensor has to be 5d!")

        if self.cached_penc is not None and self.cached_penc.shape == tensor.shape:
            return self.cached_penc

        self.cached_penc = None
        batch_size, x, y, z, orig_ch = tensor.shape
        pos_x = torch.arange(x, device=tensor.device, dtype=self.inv_freq.dtype)
        pos_y = torch.arange(y, device=tensor.device, dtype=self.inv_freq.dtype)
        pos_z = torch.arange(z, device=tensor.device, dtype=self.inv_freq.dtype)
        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
        sin_inp_y = torch.einsum("i,j->ij", pos_y, self.inv_freq)
        sin_inp_z = torch.einsum("i,j->ij", pos_z, self.inv_freq)
        emb_x = get_emb(sin_inp_x).unsqueeze(1).unsqueeze(1)
        emb_y = get_emb(sin_inp_y).unsqueeze(1)
        emb_z = get_emb(sin_inp_z)
        emb = torch.zeros(
            (x, y, z, self.channels * 3),
            device=tensor.device,
            dtype=tensor.dtype,
        )
        emb[:, :, :, : self.channels] = emb_x
        emb[:, :, :, self.channels : 2 * self.channels] = emb_y
        emb[:, :, :, 2 * self.channels :] = emb_z

        self.cached_penc = emb[None, :, :, :, :orig_ch].repeat(batch_size, 1, 1, 1, 1)
        return self.cached_penc


class PositionalEncodingPermute3D(nn.Module):
    def __init__(self, channels):
        """
        Accepts (batchsize, ch, x, y, z) instead of (batchsize, x, y, z, ch)
        """
        super(PositionalEncodingPermute3D, self).__init__()
        self.penc = PositionalEncoding3D(channels)

    def forward(self, tensor):
        tensor = tensor.permute(0, 2, 3, 4, 1)
        enc = self.penc(tensor)
        return enc.permute(0, 4, 1, 2, 3)

    @property
    def org_channels(self):
        return self.penc.org_channels


class Summer(nn.Module):
    def __init__(self, penc):
        """
        :param model: The type of positional encoding to run the summer on.
        """
        super(Summer, self).__init__()
        self.penc = penc

    def forward(self, tensor):
        """
        :param tensor: A 3, 4 or 5d tensor that matches the model output size
        :return: Positional Encoding Matrix summed to the original tensor
        """
        penc = self.penc(tensor)
        assert (
            tensor.size() == penc.size()
        ), "The original tensor size {} and the positional encoding tensor size {} must match!".format(
            tensor.size(), penc.size()
        )
        return tensor + penc


In [29]:
input_tensor=torch.rand(10,10,10,10)
k=input_tensor.shape[0]
k


10

In [None]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Optional, Tuple

class SelfAttention(nn.Module):
    def __init__(self, embed_size: int, heads: int):
        super(SelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        assert self.head_dim * heads == embed_size, "Embed size needs to be divisible by heads"

        self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.fc_out = nn.Linear(heads * self.head_dim, embed_size)

    def forward(self, values: torch.Tensor, keys: torch.Tensor, queries: torch.Tensor, mask: Optional[torch.Tensor]) -> torch.Tensor:
        N = queries.shape[0]
        value_len, key_len, query_len = values.shape[1], keys.shape[1], queries.shape[1]

        # Split the embedding into self.heads different pieces
        values = values.reshape(N, value_len, self.heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.heads, self.head_dim)
        queries = queries.reshape(N, query_len, self.heads, self.head_dim)

        values = self.values(values)
        keys = self.keys(keys)
        queries = self.queries(queries)

        # Compute the attention scores
        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])
        if mask is not None:
            energy = energy.masked_fill(mask == 0, float("-1e20"))
        attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)

        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(N, query_len, self.heads * self.head_dim)
        out = self.fc_out(out)
        return out
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_len: int = 5000):
        super().__init__()
        self.d_model = d_model
        self.max_len = max_len

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        seq_len = x.size(1)
        pe = torch.zeros(self.max_len, self.d_model, device=x.device)
        position = torch.arange(0, self.max_len, dtype=torch.float, device=x.device).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2, device=x.device).float() * (-math.log(10000.0) / self.d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe[:seq_len, :].unsqueeze(0).expand(x.size(0), -1, -1)  # Corrected line
        return x + pe


# class PositionalEncoding(nn.Module):
#     def __init__(self, d_model: int, max_len: int = 5000):
#         super().__init__()
#         self.d_model = d_model
#         self.max_len = max_len

#     def forward(self, x: torch.Tensor) -> torch.Tensor:
#         seq_len = x.size(1)
#         pe = torch.zeros(seq_len, self.d_model, device=x.device)
#         position = torch.arange(0, seq_len, dtype=torch.float, device=x.device).unsqueeze(1)
#         div_term = torch.exp(torch.arange(0, self.d_model, 2, device=x.device).float() * (-math.log(10000.0) / self.d_model))
#         pe[:, 0::2] = torch.sin(position * div_term)
#         pe[:, 1::2] = torch.cos(position * div_term)
#         pe = pe.unsqueeze(0).expand(x.size(0), -1, -1)
#         x = x + pe
#         return x

class TransformerBlock(nn.Module):
    def __init__(self, embed_size: int, heads: int, dropout: float, forward_expansion: int):
        super(TransformerBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)

        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion * embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion * embed_size, embed_size)
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, value: torch.Tensor, key: torch.Tensor, query: torch.Tensor, mask: Optional[torch.Tensor]) -> torch.Tensor:
        attention = self.attention(value, key, query, mask)
        x = self.dropout(self.norm1(attention + query))
        forward = self.feed_forward(x)
        out = self.dropout(self.norm2(forward + x))
        return out

class Encoder(nn.Module):
    def __init__(self, src_vocab_size: int, embed_size: int, num_layers: int, heads: int, device: torch.device, forward_expansion: int, dropout: float, max_length: int):
        super(Encoder, self).__init__()
        self.embed_size = embed_size
        self.device = device
        self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
        self.positional_encoding = PositionalEncoding(embed_size, max_length)

        self.layers = nn.ModuleList(
            [
                TransformerBlock(
                    embed_size,
                    heads,
                    dropout=dropout,
                    forward_expansion=forward_expansion
                )
                for _ in range(num_layers)
            ]
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor]) -> torch.Tensor:
        out = self.dropout(self.word_embedding(x) + self.positional_encoding(x))
        for layer in self.layers:
            out = layer(out, out, out, mask)

        return out

# This is a simplified example and the Decoder and full Transformer model would follow similar patterns to the Encoder.
# You would also need to implement a mask for the attention mechanism in the Decoder to prevent it from looking at future tokens.

# Example usage:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Example tensor input (batch_size, sequence_length)
input_tensor = torch.randint(0, 1000, (32, 512)).to(device)

# Assume some configuration parameters for the transformer model
src_vocab_size = 10000
embed_size = 512
num_layers = 6
heads = 8
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
forward_expansion = 4
dropout = 0.1
max_length = 512

# Create the Encoder
encoder = Encoder(
    src_vocab_size,
    embed_size,
    num_layers,
    heads,
    device,
    forward_expansion,
    dropout,
    max_length
).to(device)

# Example mask (batch_size, 1, sequence_length, sequence_length)
mask = torch.zeros((32, 1, 512, 512)).to(device)

# Forward pass through the encoder
encoder_output = encoder(input_tensor, mask)
print(encoder_output)


In [2]:
from PIL import Image

# Assuming we have two images with the same size and mode 'RGBA'
image1 = Image.open("C:/Users/heman/Pictures/observation.png")
image2 = Image.open("C:/Users/heman/Pictures/observation.png")

# Alpha composite image2 over image1
composite = Image.alpha_composite(image1, image2)
composite.show()


In [None]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Do some operations with the image
image.rotate(45).show()

# Close the image
image.close()


In [7]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Convert the image to grayscale
gray_image = image.convert("L")
gray_image.show()


In [8]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Make a copy of the image
copy_image = image.copy()
copy_image.show()


In [9]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Define the box to crop (left, upper, right, lower)
box = (100, 100, 300, 300)

# Crop the image
cropped_image = image.crop(box)
cropped_image.show()


In [10]:
from PIL import Image

# Open an image file
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Set draft mode to reduce memory usage
image.draft("RGB", (100, 100))

# Load the image at the drafted size
image.load()
image.show()


In [14]:
from PIL import Image, ImageFilter

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Apply spread effect
spread_image = image.effect_spread(1)
spread_image.show()


In [17]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Calculate the entropy of the image
image_entropy = image.entropy()
print(f"Entropy: {image_entropy}")


Entropy: 2.2786906645979075


In [18]:
from PIL import Image, ImageFilter

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Apply a built-in filter, e.g., BLUR
blurred_image = image.filter(ImageFilter.BLUR)
blurred_image.show()


In [19]:
from PIL import Image
# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Print the format of the image
print(f"Image format: {image.format}")


Image format: PNG


In [20]:
from PIL import Image
# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Print a brief description of the format
print(f"Image format description: {image.format_description}")


Image format description: Portable network graphics


In [21]:
from PIL import Image

# Assuming we have raw image data in the correct mode and size
raw_data = b'\xff' * (100 * 100)  # Example for a 100x100 white image in "L" mode

# Create an image from bytes
image = Image.frombytes('L', (100, 100), raw_data)
image.show()


In [22]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Get the bands of the image
bands = image.getbands()
print(f"Image bands: {bands}")


Image bands: ('R', 'G', 'B', 'A')


In [23]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Get the bounding box of non-zero regions
bbox = image.getbbox()
print(f"Bounding box: {bbox}")


Bounding box: (0, 0, 766, 403)


In [25]:
from PIL import Image


# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")
# Get a single channel of the image, e.g., the red channel
red_channel = image.getchannel('B')
red_channel.show()


In [26]:
from PIL import Image

# Open an image
image = Image.open("C:/Users/heman/Pictures/observation.png")

# Get a list of colors used in the image (returns None if more than 256 colors)
colors = image.getcolors()
print(f"Colors: {colors}")


Colors: [(294227, (255, 255, 255, 255)), (739, (221, 221, 221, 255)), (890, (195, 195, 195, 255)), (1, (169, 169, 169, 255)), (812, (167, 167, 167, 255)), (934, (137, 137, 137, 255)), (817, (105, 105, 105, 255)), (759, (71, 71, 71, 255)), (882, (35, 35, 35, 255)), (8637, (0, 0, 0, 255))]
