In [1]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import io
from PIL import Image
from skimage.metrics import structural_similarity as ssim
import os
import logging
import cv2
MAX_INT = 2**31 - 1
logger = logging.getLogger("ENOFT_LOGER")
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler("model.json")
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)


class ImageCompressionEnv(gym.Env):
    def __init__(self, images):
        super(ImageCompressionEnv, self).__init__()
        self.images = images
        self.current_index = 0
        self.current_image = self.images[self.current_index]
        self.original_size = 0
        self.compression_ratio = 1.0  # Initialize with a default value
        
        # Define action and observation space
        # quality, rst_interval, luma quality, chroma, sampling factor
        self.action_space = spaces.MultiDiscrete([101, 65536, 101, 101, 5])
        size = self.current_image.size + 1  # Include compression ratio in the state
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(size,), dtype=np.uint8
        )
        

    def step(self, action):
        compressed_image, compressed_size = self.compress_image(self.current_image, action)
        max_size = self.original_size * self.compression_ratio / 100
        
        got_ssim = 0
        if compressed_size > max_size:
            reward = max_size - compressed_size
        else:
            grey_original = np.dot(self.current_image[...,:3], [0.2989, 0.5870, 0.1140]).astype(np.uint8)
            grey_compressed = np.dot(compressed_image[...,:3], [0.2989, 0.5870, 0.1140]).astype(np.uint8)
            got_ssim = ssim(grey_original, grey_compressed, multichannel=True)
            reward = got_ssim
        
        self.current_index = (self.current_index + 1) % len(self.images)
        self.current_image = self.images[self.current_index]
        done = self.current_index == 0
        
        # Include compression ratio in the state
        state = np.append(self.current_image.flatten(), self.compression_ratio) 
        log = {
            "original_size": self.original_size,
            "compressed_size": compressed_size,
            "max_size": max_size,
            "got_ssim": got_ssim,
            "reward": reward,
            "action": action
        }
        logger.info(log)
        return state, float(reward), done, {}

    def reset(self):
        self.current_index = 0
        self.current_image = self.images[self.current_index]
        success, buffer = cv2.imencode('.jpg', self.current_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
        buffer = io.BytesIO(buffer)
        self.original_size = len(buffer.getvalue())
        self.compression_ratio = int(np.random.uniform(20, 90))  # Random compression ratio between 30% and 100%
        
        # Include compression ratio in the state
        state = np.append(self.current_image.flatten(), self.compression_ratio)
        return state

    def compress_image(self, image: np.ndarray, action):
        compression_level, rst_interval, luma_quality, chroma_quality, sampling_factor = action

        # Map sampling_factor to OpenCV equivalent
        sampling_map = {
            0: cv2.IMWRITE_JPEG_SAMPLING_FACTOR_411,
            1: cv2.IMWRITE_JPEG_SAMPLING_FACTOR_420,
            2: cv2.IMWRITE_JPEG_SAMPLING_FACTOR_422,
            3: cv2.IMWRITE_JPEG_SAMPLING_FACTOR_440,
            4: cv2.IMWRITE_JPEG_SAMPLING_FACTOR_444
        }
        sampling_factor = np.clip(sampling_factor, 0, 4)
        sampling = sampling_map[sampling_factor]

        # Encode the image using OpenCV with additional JPEG parameters
        encode_params = [
            int(cv2.IMWRITE_JPEG_QUALITY), compression_level,
            int(cv2.IMWRITE_JPEG_RST_INTERVAL), rst_interval,
            int(cv2.IMWRITE_JPEG_LUMA_QUALITY), luma_quality,
            int(cv2.IMWRITE_JPEG_CHROMA_QUALITY), chroma_quality,
            int(cv2.IMWRITE_JPEG_SAMPLING_FACTOR), sampling
        ]
        success, buffer = cv2.imencode('.jpg', image, encode_params)
        
        if not success:
            raise ValueError("Failed to compress image")
        
        # Convert buffer to BytesIO
        buffer = io.BytesIO(buffer)
        compressed_size = len(buffer.getvalue())  # Get the size of the buffer

        # Rewind buffer to the beginning
        buffer.seek(0)

        # Read the compressed image using Pillow
        compressed_image = Image.open(buffer)
        compressed_image = np.array(compressed_image)
        
        return compressed_image, compressed_size
    


png_folder = "datasets/kaggle_Kodak"
images = []
for i in os.listdir(png_folder):
    img_path = os.path.join(png_folder, i)
    img = cv2.imread(img_path)
    h, w, c = img.shape
    # Calculate padding amounts
    pad_height = 1000 - h
    pad_width = 1000 - w
    # Ensure padding values are non-negative
    pad_height_top = pad_height // 2
    pad_height_bottom = pad_height - pad_height_top
    pad_width_left = pad_width // 2
    pad_width_right = pad_width - pad_width_left

    # Pad the image
    img_padded = cv2.copyMakeBorder(img, 
                                    top=pad_height_top, bottom=pad_height_bottom, 
                                    left=pad_width_left, right=pad_width_right, 
                                    borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
    
    images.append(img_padded)
# images = [np.random.randint(0, 256, (64, 64, 3), dtype=np.uint8) for _ in range(100)]
env = ImageCompressionEnv(images)


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
#  Proximal Policy Optimization
class PPO(nn.Module):
    def __init__(self, input_dim, output_dims):
        super(PPO, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.policy_layers = nn.ModuleList([
            nn.Linear(128, 101),    # For first action dimension (101 values)
            nn.Linear(128, 65536),  # For second action dimension (65536 values)
            nn.Linear(128, 101),    # For third action dimension (101 values)
            nn.Linear(128, 101),    # For fourth action dimension (101 values)
            nn.Linear(128, 5)       # For fifth action dimension (5 values)
        ])
        self.value_layer = nn.Linear(128, 1)
        self.output_dims = output_dims  # List of number of action values per dimension
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        policy_logits = [layer(x) for layer in self.policy_layers]
        value = self.value_layer(x)
        return policy_logits, value

    def get_action(self, state):
        policy_logits, _ = self.forward(state)
        actions = []
        log_probs = []
        entropies = []
        for i, logits in enumerate(policy_logits):
            policy_dist = Categorical(logits=logits)
            action = policy_dist.sample()
            actions.append(action.item())
            log_probs.append(policy_dist.log_prob(action))
            entropies.append(policy_dist.entropy())
        return actions, torch.stack(log_probs), torch.stack(entropies)

    def evaluate_action(self, state, action):
        policy_logits, value = self.forward(state)
        log_probs = []
        entropies = []
        for i, logits in enumerate(policy_logits):
            policy_dist = Categorical(logits=logits)
            log_prob = policy_dist.log_prob(action[:, i])
            log_probs.append(log_prob)
            entropies.append(policy_dist.entropy())
        return torch.stack(log_probs, dim=1), torch.squeeze(value), torch.stack(entropies, dim=1)

def compute_gae(rewards, masks, values, gamma=0.99, tau=0.95):
    returns = []
    gae = 0
    for step in reversed(range(len(rewards))):
        delta = rewards[step] + gamma * values[step + 1] * masks[step] - values[step]
        gae = delta + gamma * tau * masks[step] * gae
        returns.insert(0, gae + values[step])
    return returns

def ppo_update(agent, optimizer, trajectories, clip_param=0.2):
    states = torch.stack([trajectory[0] for trajectory in trajectories]).detach()
    actions = torch.stack([torch.tensor(trajectory[1]) for trajectory in trajectories]).detach()
    log_probs_old = torch.stack([trajectory[2] for trajectory in trajectories]).detach()
    returns = torch.tensor([trajectory[3] for trajectory in trajectories])
    advantages = returns - torch.tensor([trajectory[4] for trajectory in trajectories])
    
    for _ in range(4):
        log_probs, state_values, dist_entropy = agent.evaluate_action(states, actions)
        ratio = torch.exp(log_probs - log_probs_old)
        surr1 = ratio * advantages
        surr2 = torch.clamp(ratio, 1.0 - clip_param, 1.0 + clip_param) * advantages
        policy_loss = -torch.min(surr1, surr2).mean()
        value_loss = (returns - state_values).pow(2).mean()
        loss = policy_loss + 0.5 * value_loss - 0.01 * dist_entropy.mean()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()



In [3]:
def train(env, agent, optimizer, num_episodes=100, gamma=0.99, clip_param=0.2):
    all_rewards = []
    for episode in range(num_episodes):
        state = env.reset()
        state = torch.FloatTensor(state).unsqueeze(0)
        episode_reward = 0
        done = False
        trajectories = []

        while not done:
            action, log_prob, _ = agent.get_action(state)
            next_state, reward, done, _ = env.step(action)
            next_state = torch.FloatTensor(next_state).unsqueeze(0)

            mask = 1 if not done else 0
            trajectories.append((state, action, log_prob, reward, mask))

            state = next_state
            episode_reward += reward

        all_rewards.append(episode_reward)

        states = torch.stack([trajectory[0] for trajectory in trajectories])
        actions = torch.stack([torch.tensor(trajectory[1]) for trajectory in trajectories])
        log_probs = torch.stack([trajectory[2] for trajectory in trajectories])
        rewards = torch.tensor([trajectory[3] for trajectory in trajectories])
        masks = torch.tensor([trajectory[4] for trajectory in trajectories])

        with torch.no_grad():
            _, next_value = agent(states[-1])
            values = torch.cat([agent(states)[1], next_value.unsqueeze(0)])

        returns = compute_gae(rewards, masks, values, gamma)
        ppo_update(agent, optimizer, trajectories, clip_param)

        if episode % 10 == 0:
            print(f"Episode {episode}, Reward: {episode_reward}")

    return all_rewards

input_dim = 1000 * 1000 * 3 + 1  # Example image size plus the compression ratio
output_dims = [101, 65536, 101, 101, 5]  # Matching the action space dimensions
agent = PPO(input_dim, len(output_dims))
optimizer = optim.Adam(agent.parameters(), lr=3e-4)
train(env, agent, optimizer)

Episode 0, Reward: 22.036033629898583
Episode 10, Reward: 23.123209684466243
Episode 20, Reward: 23.123209684466243
Episode 30, Reward: 23.123209684466243
Episode 40, Reward: 23.123209684466243
Episode 50, Reward: 23.123209684466243
Episode 60, Reward: 23.123209684466243
Episode 70, Reward: 23.123209684466243
Episode 80, Reward: 23.123209684466243
Episode 90, Reward: 23.123209684466243


[22.036033629898583,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 -3752.4312472607116,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.123209684466243,
 23.12320968

In [4]:
def evaluate(env, agent, num_episodes=10):
    all_rewards = []
    for _ in range(num_episodes):
        state = env.reset()
        state = torch.FloatTensor(state).unsqueeze(0)
        episode_reward = 0
        done = False

        while not done:
            action, _, _ = agent.get_action(state)
            next_state, reward, done, _ = env.step(action)
            next_state = torch.FloatTensor(next_state).unsqueeze(0)
            state = next_state
            episode_reward += reward

        all_rewards.append( episode_reward )
    return all_rewards

# Example usage
evaluation_rewards = evaluate(env, agent)
print(f"Average evaluation reward: {np.mean(evaluation_rewards)}")


KeyboardInterrupt: 

In [4]:
def save_model(agent, path):
    torch.save(agent.state_dict(), path)
    
def load_model(agent, path):
    agent.load_state_dict(torch.load(path))
    return agent

# Example usage
save_model(agent, "model.pth")
# loaded_agent = PPO(input_dim, output_dim)
# loaded_agent = load_model(loaded_agent, "model.pth")
# evaluation_rewards = evaluate(env, loaded_agent)
