In [None]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import cv2
import heapq
import sys
from collections import defaultdict

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

train_data = ImageFolder("/content/drive/MyDrive/dataset/train", transform=transform)
val_data = ImageFolder("/content/drive/MyDrive/dataset/val", transform=transform)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16, shuffle=False)

classes = train_data.classes


In [None]:
model = models.mobilenet_v2(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

model.classifier[1] = nn.Linear(model.classifier[1].in_features, 4)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)




In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}")


Epoch [1/10], Loss: 0.9939
Epoch [2/10], Loss: 0.5389
Epoch [3/10], Loss: 0.3539
Epoch [4/10], Loss: 0.3058
Epoch [5/10], Loss: 0.2582
Epoch [6/10], Loss: 0.2410
Epoch [7/10], Loss: 0.2441
Epoch [8/10], Loss: 0.1992
Epoch [9/10], Loss: 0.1801
Epoch [10/10], Loss: 0.1720


In [None]:
# RLE
def rle_encode(img):
    pixels = img.flatten()
    encoding = []
    prev_pixel = pixels[0]
    count = 1
    for pixel in pixels[1:]:
        if pixel == prev_pixel:
            count += 1
        else:
            encoding.append((prev_pixel, count))
            prev_pixel = pixel
            count = 1
    encoding.append((prev_pixel, count))
    return encoding

# Huffman
class Node:
    def __init__(self, freq, symbol, left=None, right=None):
        self.freq = freq
        self.symbol = symbol
        self.left = left
        self.right = right
    def __lt__(self, nxt):
        return self.freq < nxt.freq

def huffman_encoding(img):
    pixels = img.flatten()
    freq = defaultdict(int)
    for val in pixels:
        freq[val] += 1

    heap = [Node(freq[v], v) for v in freq]
    heapq.heapify(heap)

    while len(heap) > 1:
        n1 = heapq.heappop(heap)
        n2 = heapq.heappop(heap)
        merged = Node(n1.freq + n2.freq, None, n1, n2)
        heapq.heappush(heap, merged)

    root = heap[0]
    huff_code = {}

    def assign_codes(node, code=''):
        if node:
            if node.symbol is not None:
                huff_code[node.symbol] = code
            assign_codes(node.left, code + '0')
            assign_codes(node.right, code + '1')

    assign_codes(root)
    encoded = ''.join([huff_code[p] for p in pixels])
    return encoded, huff_code

# DCT
def apply_dct(img):
    img = np.float32(img) / 255.0
    dct = cv2.dct(img)
    return dct

def inverse_dct(dct_img):
    return cv2.idct(dct_img)

def get_size(obj):
    return sys.getsizeof(obj)


In [None]:
model.eval()
correct = 0
total = 0

for images, labels in val_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

    # Apply compression on first image in the batch
    img = images[0].cpu().permute(1, 2, 0).numpy()
    img = ((img * 0.5) + 0.5) * 255  # Denormalize
    img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2GRAY)
    img = cv2.resize(img, (256, 256))


print(f"Validation Accuracy: {(100 * correct / total):.2f}%")




Validation Accuracy: 93.75%


In [None]:
from PIL import Image

# Preprocessing for model
transform_single = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

import requests
from io import BytesIO

def predict_from_url(img_url, model):
    # Step 1: Download image from URL
    response = requests.get(img_url)
    img_pil = Image.open(BytesIO(response.content)).convert("RGB")

    # Step 2: Convert to grayscale for compression
    img_gray = img_pil.convert("L")
    img_gray = img_gray.resize((224, 224))
    img_np = np.array(img_gray)

    # Step 3: Compression
    original_size = img_np.size

    rle_encoded = rle_encode(img_np)
    rle_size = get_size(rle_encoded)

    huff_encoded, _ = huffman_encoding(img_np)
    huff_size = len(huff_encoded) // 8

    # DCT
    dct_img = apply_dct(img_np)
    recon_img = inverse_dct(dct_img)
    recon_img = np.clip(recon_img * 255, 0, 255).astype(np.uint8)

    # Step 4: Convert DCT output to RGB format for model
    recon_rgb = cv2.cvtColor(recon_img, cv2.COLOR_GRAY2RGB)
    recon_pil = Image.fromarray(recon_rgb)

    input_tensor = transform_single(recon_pil).unsqueeze(0).to(device)

    # Step 5: Predict
    model.eval()
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted = torch.max(output, 1)

    print("📦 Compression Stats:")
    print(f"Original size: {original_size}")
    print(f"RLE size: {rle_size}")
    print(f"Huffman size: {huff_size}")
    print("📸 Reconstructed image (from DCT) sent to model.")

    return classes[predicted.item()]


In [None]:
img_url = "https://th.bing.com/th/id/OIP.OmszxJcT8NO06xdukAihmwHaE7?w=266&h=180&c=7&r=0&o=5&pid=1.7"
predicted_class = predict_from_url(img_url, model)
print(f"🚗 Predicted class: {predicted_class}")


📦 Compression Stats:
Original size: 50176
RLE size: 351064
Huffman size: 49131
📸 Reconstructed image (from DCT) sent to model.
🚗 Predicted class: Car
