diff --git a/drivers/Ide.c b/drivers/Ide.c
index bd7ad22..a100554 100644
Binary files a/drivers/Ide.c and b/drivers/Ide.c differ
diff --git a/drivers/Ide.h b/drivers/Ide.h
index 5b5980e..531ce24 100644
--- a/drivers/Ide.h
+++ b/drivers/Ide.h
@@ -33,6 +33,7 @@
 #define IDE_CMD_WRITE_SECTORS   0x30
 #define IDE_CMD_IDENTIFY        0xEC
 #define IDE_CMD_PACKET          0xA0
+#define IDE_CMD_IDENTIFY_PACKET 0xA1
 #define ATAPI_CMD_READ_10       0x28
 
 
@@ -42,10 +43,10 @@
 
 // Error Codes
 #define IDE_OK              0
-#define IDE_ERROR_TIMEOUT   -1
-#define IDE_ERROR_NOT_READY -2
-#define IDE_ERROR_NO_DRIVE  -3
-#define IDE_ERROR_IO        -4
+#define IDE_ERROR_TIMEOUT   (-1)
+#define IDE_ERROR_NOT_READY (-2)
+#define IDE_ERROR_NO_DRIVE  (-3)
+#define IDE_ERROR_IO        (-4)
 
 typedef struct {
     uint16_t base_port;
@@ -57,6 +58,7 @@ typedef struct {
 
 // Core Functions
 int IdeInit(void);
+int IdeIsInitialized(void);
 int IdeReadSector(uint8_t drive, uint32_t lba, void* buffer);
 int IdeWriteSector(uint8_t drive, uint32_t lba, const uint8_t* buffer);
 int IdeGetDriveInfo(uint8_t drive, char* model_out);
diff --git a/fs/Iso9660.c b/fs/Iso9660.c
index 001a724..61f86d9 100644
--- a/fs/Iso9660.c
+++ b/fs/Iso9660.c
@@ -12,6 +12,30 @@
 
 static uint8_t cdrom_drive = 0xFF;
 
+static inline char toupper_iso(char c) {
+    if (c >= 'a' && c <= 'z') return (char)(c - 'a' + 'A');
+    return c;
+}
+
+// Compare ISO names case-insensitively and ignore version suffix ";n"
+static int IsoNameEquals(const char* a, const char* b) {
+    // Skip leading spaces in ISO entries if any
+    while (*a == ' ') a++;
+    while (*b == ' ') b++;
+
+    for (;;) {
+        char ca = *a;
+        char cb = *b;
+        if (ca == ';') ca = '\0';
+        if (cb == ';') cb = '\0';
+        ca = toupper_iso(ca);
+        cb = toupper_iso(cb);
+        if (ca != cb) return 0;
+        if (ca == '\0') return 1;
+        a++; b++;
+    }
+}
+
 static int ReadSector(uint32_t lba, void* buffer) {
     if (cdrom_drive == 0xFF) {
         PrintKernel("[ISO] Auto-detecting CD-ROM drive...\n");
@@ -78,7 +102,7 @@ static Iso9660DirEntry* FindFileInDir(uint32_t dir_lba, uint32_t dir_size, const
                 *semicolon = 0;
             }
 
-            if (FastStrCmp(entry_filename, filename) == 0) {
+            if (IsoNameEquals(entry_filename, filename)) {
                 // Found it! We need to copy the entry to a new buffer, as the sector_buffer will be freed.
                 Iso9660DirEntry* result = KernelMemoryAlloc(entry->length);
                 if (result) {
@@ -241,15 +265,14 @@ int Iso9660Read(const char* path, void* buffer, uint32_t max_size) {
     const uint32_t to_read = (file_size < max_size) ? file_size : max_size;
     uint8_t* read_buffer = (uint8_t*)buffer;
     uint32_t bytes_read = 0;
+    uint8_t* temp_sector = KernelMemoryAlloc(ISO9660_SECTOR_SIZE);
+    if (!temp_sector) {
+        if (current_entry != root_entry) KernelFree(current_entry);
+        KernelFree(pvd);
+        return -1;
+    }
     while (bytes_read < to_read) {
         uint32_t sector_to_read = file_lba + (bytes_read / ISO9660_SECTOR_SIZE);
-        uint8_t* temp_sector = KernelMemoryAlloc(ISO9660_SECTOR_SIZE);
-        if (!temp_sector) {
-            if (current_entry != root_entry) KernelFree(current_entry);
-            KernelFree(pvd);
-            return -1;
-        }
-
         if (ReadSector(sector_to_read, temp_sector) != 0) {
             KernelFree(temp_sector);
             if (current_entry != root_entry) KernelFree(current_entry);
@@ -264,8 +287,8 @@ int Iso9660Read(const char* path, void* buffer, uint32_t max_size) {
 
         FastMemcpy(read_buffer + bytes_read, temp_sector + offset_in_sector, chunk_size);
         bytes_read += chunk_size;
-        KernelFree(temp_sector);
     }
+    KernelFree(temp_sector);
 
     if (current_entry != root_entry) {
         KernelFree(current_entry);
@@ -447,8 +470,6 @@ int Iso9660CopyFile(const char* iso_path, const char* vfs_path) {
     return 0; // Success
 }
 
-
-
 int Iso9660Copy(const char* iso_path, const char* vfs_path) {
     Iso9660DirEntry** entries = Iso9660ListDir(iso_path);
     if (!entries) {
diff --git a/fs/VFRFS.c b/fs/VFRFS.c
index 5f63158..2f6eede 100644
--- a/fs/VFRFS.c
+++ b/fs/VFRFS.c
@@ -433,7 +433,7 @@ int FsListDir(const char* path) {
 
     FsNode* child = dir_node->children;
     if (!child) {
-        PrintKernel("(empty directory)\n");
+        PrintKernel("\n");
         return 0;
     }
 
diff --git a/kernel/etc/POST.c b/kernel/etc/POST.c
index 82b3754..4588808 100644
--- a/kernel/etc/POST.c
+++ b/kernel/etc/POST.c
@@ -27,7 +27,7 @@ bool MemoryTest() {
         if (!ptr) return false;
         KernelFree(ptr);
     }
-    //
+
     for (int i = 0; i < N; i++) ptrs[i] = KernelMemoryAlloc(128);
 
     // free every other block
@@ -38,25 +38,6 @@ bool MemoryTest() {
         ptrs[i] = KernelMemoryAlloc((i % 2) ? 64 : 256);
     }
 
-    for (int iter = 0; iter < 100000; iter++) {
-        int idx = rnd() % N;
-        if (ptrs[idx]) {
-            KernelFree(ptrs[idx]);
-            ptrs[idx] = NULL;
-        } else {
-            size_t sz = (rnd() % 8192) + 1; // 1–8K
-            ptrs[idx] = KernelMemoryAlloc(sz);
-            if (!ptrs[idx]) PANIC("OOM during fuzz");
-        }
-    }
-
-    for (uintptr_t addr = 0x400000; addr < 0x800000; addr += 0x1000) {
-        void* frame = AllocPage();
-        VMemMap(addr, (uint64_t)frame, PAGE_PRESENT | PAGE_WRITABLE);
-        VMemUnmap(addr, PAGE_SIZE);
-        FreePage(frame);
-    }
-
     for (int i = 0; i < 1000; i++) {
         size_t sz = (i % 500) + 1;
         uint8_t *p = (uint8_t*)KernelMemoryAlloc(sz);
diff --git a/kernel/etc/Shell.c b/kernel/etc/Shell.c
index e48af30..2cc0a34 100644
--- a/kernel/etc/Shell.c
+++ b/kernel/etc/Shell.c
@@ -1220,11 +1220,11 @@ void ExecuteCommand(const char* cmd) {
             };
             const uint32_t pid = LoadExecutable(full, &opts);
             if (pid != 0) {
-                PrintKernelSuccess("ELF Executable loaded (PID: ");
+                PrintKernelSuccess("Executable loaded (PID: ");
                 PrintKernelInt(pid);
                 PrintKernel(")\n");
             } else {
-                PrintKernelError("Failed to load ELF executable\n");
+                PrintKernelError("Failed to load executable\n");
             }
             KernelFree(cmd_name);
             return; // avoid also running a built-in with the same name
diff --git a/mm/KernelHeap.c b/mm/KernelHeap.c
index 02d43dd..5e1da8c 100644
--- a/mm/KernelHeap.c
+++ b/mm/KernelHeap.c
@@ -9,9 +9,11 @@ typedef struct HeapBlock {
     uint32_t magic;           // Magic number for corruption detection
     size_t size;              // User data size (not including header)
     uint8_t is_free;          // Boolean: 1 if free, 0 if allocated
-    struct HeapBlock* next;   // Next block in list
-    struct HeapBlock* prev;   // Previous block in list
+    uint8_t in_cache;         // Boolean: 1 if present in fast cache
+    struct HeapBlock* next;   // Next block in heap list (by physical order within a chunk)
+    struct HeapBlock* prev;   // Previous block in heap list
     uint32_t checksum;        // Header checksum for integrity
+    struct HeapBlock* cache_next; // Next block in fast cache list (separate linkage)
 } HeapBlock;
 
 // Magic constants
@@ -46,6 +48,10 @@ static size_t total_allocated = 0;
 static size_t peak_allocated = 0;
 static FastCache fast_caches[NUM_SIZE_CLASSES];
 
+// Runtime-tunable knobs (with safe defaults)
+static volatile size_t g_small_alloc_threshold = SMALL_ALLOC_THRESHOLD;
+static volatile int g_fast_cache_capacity = FAST_CACHE_SIZE;
+
 // Validation level (can be reduced in production)
 static volatile int validation_level = 1; // 0=none, 1=basic, 2=full
 
@@ -124,6 +130,8 @@ static void InitBlock(HeapBlock* block, size_t size, int is_free) {
     block->magic = is_free ? HEAP_MAGIC_FREE : HEAP_MAGIC_ALLOC;
     block->size = size;
     block->is_free = is_free ? 1 : 0;
+    block->in_cache = 0;           // Reset cache state on (re)initialization
+    block->cache_next = NULL;      // Clear cache linkage
     if (validation_level > 1) {
         block->checksum = ComputeChecksum(block);
     }
@@ -143,12 +151,12 @@ static HeapBlock* FastCachePop(int size_class) {
     if (!cache->free_list) return NULL;
 
     HeapBlock* block = cache->free_list;
-    cache->free_list = block->next;
+    cache->free_list = block->cache_next;
     cache->count--;
 
-    // Clear linkage
-    block->next = NULL;
-    block->prev = NULL;
+    // Clear cache linkage and flag
+    block->cache_next = NULL;
+    block->in_cache = 0;
     return block;
 }
 
@@ -156,24 +164,23 @@ static HeapBlock* FastCachePop(int size_class) {
 static void FastCachePush(HeapBlock* block, int size_class) {
     ASSERT(__sync_fetch_and_add(&kheap_lock, 0) != 0);
     FastCache* cache = &fast_caches[size_class];
-    if (cache->count >= FAST_CACHE_SIZE) return; // Cache full
+    if (cache->count >= g_fast_cache_capacity) return; // Cache full
 
-    block->next = cache->free_list;
-    block->prev = NULL;
-    if (cache->free_list) cache->free_list->prev = block;
+    block->cache_next = cache->free_list;
     cache->free_list = block;
     cache->count++;
+    block->in_cache = 1;
 }
 
 // Optimized free block search with early termination
 static HeapBlock* FindFreeBlock(size_t size) {
     // For small allocations, do a quick scan for exact/close fits
-    if (size <= SMALL_ALLOC_THRESHOLD) {
+    if (size <= g_small_alloc_threshold) {
         HeapBlock* first_fit = NULL;
         int blocks_scanned = 0;
 
         for (HeapBlock* block = heap_head; block && blocks_scanned < 32; block = block->next, blocks_scanned++) {
-            if (block->is_free && block->size >= size) {
+            if (block->is_free && !block->in_cache && block->size >= size) {
                 if (block->size <= size * 2) { // Close fit
                     return block;
                 }
@@ -188,7 +195,7 @@ static HeapBlock* FindFreeBlock(size_t size) {
     size_t best_size = MAX_ALLOC_SIZE;
 
     for (HeapBlock* block = heap_head; block; block = block->next) {
-        if (block->is_free && block->size >= size && block->size < best_size) {
+        if (block->is_free && !block->in_cache && block->size >= size && block->size < best_size) {
             best = block;
             best_size = block->size;
             if (block->size == size) break; // Perfect fit
@@ -215,7 +222,7 @@ static void SplitBlock(HeapBlock* block, size_t needed_size) {
     if (block->next) block->next->prev = new_block;
     block->next = new_block;
 
-    // Update original block size
+    // Update original
     block->size = needed_size;
     UpdateChecksum(block);
 }
@@ -224,10 +231,10 @@ static void SplitBlock(HeapBlock* block, size_t needed_size) {
 static HeapBlock* CreateNewBlock(size_t size) {
     // For small allocations, allocate larger chunks to reduce VMem calls
     size_t chunk_size = size;
-    if (size <= SMALL_ALLOC_THRESHOLD) {
+    if (size <= g_small_alloc_threshold) {
         chunk_size = (size < 4096) ? 4096 : PAGE_ALIGN_UP(size * 4);
     }
-    
+
     size_t total_size = sizeof(HeapBlock) + chunk_size;
     void* mem = VMemAlloc(total_size);
     if (!mem) return NULL;
@@ -249,21 +256,55 @@ static HeapBlock* CreateNewBlock(size_t size) {
     return block;
 }
 
-// Coalesce adjacent free blocks (optimized)
+// Physical adjacency helper
+static inline int AreAdjacent(HeapBlock* a, HeapBlock* b) {
+    return (uint8_t*)b == ((uint8_t*)BlockToUser(a) + a->size);
+}
+
+// Remove a block from any fast cache it may be in
+static void CacheRemove(HeapBlock* blk) {
+    if (!blk->in_cache) return;
+    for (int i = 0; i < NUM_SIZE_CLASSES; i++) {
+        HeapBlock* prev = NULL;
+        HeapBlock* cur = fast_caches[i].free_list;
+        while (cur) {
+            if (cur == blk) {
+                if (prev) prev->cache_next = cur->cache_next;
+                else fast_caches[i].free_list = cur->cache_next;
+                fast_caches[i].count--;
+                blk->cache_next = NULL;
+                blk->in_cache = 0;
+                return;
+            }
+            prev = cur;
+            cur = cur->cache_next;
+        }
+    }
+    // Not found: clear flag defensively
+    blk->in_cache = 0;
+    blk->cache_next = NULL;
+}
+
+// Coalesce adjacent free blocks (optimized and safe)
 static void CoalesceWithAdjacent(HeapBlock* block) {
-    // Merge with next blocks
-    while (block->next && block->next->is_free) {
+    // Merge with next blocks only if physically adjacent
+    while (block->next && block->next->is_free && AreAdjacent(block, block->next)) {
         HeapBlock* next = block->next;
         if (!ValidateBlockFast(next)) break;
 
+        // If the neighbor is cached, remove it from cache first
+        if (next->in_cache) {
+            CacheRemove(next);
+        }
+
         block->size += sizeof(HeapBlock) + next->size;
         block->next = next->next;
         if (next->next) next->next->prev = block;
         UpdateChecksum(block);
     }
 
-    // Let previous block merge with this one
-    if (block->prev && block->prev->is_free) {
+    // Let previous block merge with this one if physically adjacent
+    if (block->prev && block->prev->is_free && AreAdjacent(block->prev, block)) {
         CoalesceWithAdjacent(block->prev);
     }
 }
@@ -304,6 +345,9 @@ void* KernelMemoryAlloc(size_t size) {
         if (block) {
             fast_caches[size_class].hits++;
             InitBlock(block, actual_size, 0);
+            if (validation_level > 1) {
+                FastMemset(BlockToUser(block), 0xAA, actual_size); // poison on alloc (debug)
+            }
             total_allocated += actual_size;
             if (total_allocated > peak_allocated) {
                 peak_allocated = total_allocated;
@@ -329,12 +373,18 @@ void* KernelMemoryAlloc(size_t size) {
 
         SplitBlock(block, size);
         InitBlock(block, size, 0);
+        if (validation_level > 1) {
+            FastMemset(BlockToUser(block), 0xAA, size);
+        }
     } else {
         block = CreateNewBlock(size);
         if (!block) {
             SpinUnlockIrqRestore(&kheap_lock, flags);
             return NULL;
         }
+        if (validation_level > 1) {
+            FastMemset(BlockToUser(block), 0xAA, size);
+        }
     }
 
     total_allocated += size;
@@ -444,6 +494,7 @@ void PrintHeapStats(void) {
     size_t free_blocks = 0, used_blocks = 0;
     size_t free_bytes = 0, used_bytes = 0;
     size_t cached_blocks = 0;
+    size_t largest_free = 0;
 
     for (HeapBlock* block = heap_head; block; block = block->next) {
         if (!ValidateBlock(block, "stats")) continue;
@@ -451,6 +502,7 @@ void PrintHeapStats(void) {
         if (block->is_free) {
             free_blocks++;
             free_bytes += block->size;
+            if (block->size > largest_free) largest_free = block->size;
         } else {
             used_blocks++;
             used_bytes += block->size;
@@ -465,17 +517,23 @@ void PrintHeapStats(void) {
     SpinUnlockIrqRestore(&kheap_lock, flags);
 
     PrintKernel("[HEAP] Blocks: "); PrintKernelInt(used_blocks);
-    PrintKernel(" used, "); PrintKernelInt(free_blocks); PrintKernel(" free, ");
+    PrintKernel(", "); PrintKernelInt(free_blocks); PrintKernel(" free, ");
     PrintKernelInt(cached_blocks); PrintKernel(" cached\n");
     PrintKernel("[HEAP] Memory: "); PrintKernelInt(used_bytes / 1024);
     PrintKernel("KB used, "); PrintKernelInt(free_bytes / 1024); PrintKernel("KB free\n");
     PrintKernel("[HEAP] Peak: "); PrintKernelInt(peak_allocated / 1024); PrintKernel("KB\n");
-    
+
+    if (free_bytes > 0) {
+        int frag = (int)(((free_bytes - largest_free) * 100) / free_bytes);
+        PrintKernel("[HEAP] Fragmentation: "); PrintKernelInt(frag); PrintKernel("% (largest free block ");
+        PrintKernelInt(largest_free); PrintKernel(" bytes)\n");
+    }
+
     // Show cache efficiency
     PrintKernel("[HEAP] Cache stats:\n");
     for (int i = 0; i < NUM_SIZE_CLASSES; i++) {
         if (fast_caches[i].hits + fast_caches[i].misses > 0) {
-            int hit_rate = (fast_caches[i].hits * 100) / (fast_caches[i].hits + fast_caches[i].misses);
+            int hit_rate = (int)((fast_caches[i].hits * 100) / (fast_caches[i].hits + fast_caches[i].misses));
             PrintKernel("  "); PrintKernelInt(size_classes[i]); PrintKernel("B: ");
             PrintKernelInt(hit_rate); PrintKernel("% hit rate\n");
         }
@@ -500,4 +558,32 @@ void KernelHeapFlushCaches(void) {
     }
 
     SpinUnlockIrqRestore(&kheap_lock, flags);
-}
\ No newline at end of file
+}
+
+
+void KernelHeapTune(size_t small_alloc_threshold, int fast_cache_capacity) {
+    irq_flags_t flags = SpinLockIrqSave(&kheap_lock);
+
+    // Clamp to sane bounds
+    if (small_alloc_threshold < MIN_BLOCK_SIZE) small_alloc_threshold = MIN_BLOCK_SIZE;
+    if (small_alloc_threshold > 8192) small_alloc_threshold = 8192; // cap to keep chunking reasonable
+    if (fast_cache_capacity < 0) fast_cache_capacity = 0;
+    if (fast_cache_capacity > 1024) fast_cache_capacity = 1024; // prevent runaway memory in caches
+
+    g_small_alloc_threshold = AlignSize(small_alloc_threshold);
+    g_fast_cache_capacity = fast_cache_capacity;
+
+    // If capacity shrank, proactively trim caches and coalesce
+    for (int i = 0; i < NUM_SIZE_CLASSES; i++) {
+        while (fast_caches[i].count > g_fast_cache_capacity) {
+            HeapBlock* blk = FastCachePop(i);
+            if (blk) {
+                // Mark free and merge back to main free space
+                InitBlock(blk, size_classes[i], 1);
+                CoalesceWithAdjacent(blk);
+            }
+        }
+    }
+
+    SpinUnlockIrqRestore(&kheap_lock, flags);
+}
diff --git a/mm/KernelHeap.h b/mm/KernelHeap.h
index b275053..b793bed 100644
--- a/mm/KernelHeap.h
+++ b/mm/KernelHeap.h
@@ -17,4 +17,7 @@ void PrintHeapStats(void);
 void KernelHeapSetValidationLevel(int level);  // 0=none, 1=basic, 2=full
 void KernelHeapFlushCaches(void);
 
+// Runtime tuning knobs (safe to call at early boot or quiescent points)
+void KernelHeapTune(size_t small_alloc_threshold, int fast_cache_capacity);
+
 #endif // KHEAP_H
\ No newline at end of file
diff --git a/mm/VMem.c b/mm/VMem.c
index 77cc5ca..9dcbf29 100644
--- a/mm/VMem.c
+++ b/mm/VMem.c
@@ -346,49 +346,44 @@ void VMemFree(void* vaddr, uint64_t size) {
 
     // 2. Determine which region this address belongs to
     irq_flags_t flags = SpinLockIrqSave(&vmem_lock);
-    
+
     int region = (start_vaddr >= VIRT_ADDR_SPACE_HIGH_START) ? 1 : 0;
     VMemFreeBlock** free_list = region ? &kernel_space.free_list_high : &kernel_space.free_list_low;
 
-    VMemFreeBlock* new_block = AllocFreeBlock();
-    if (!new_block) {
+    VMemFreeBlock* node = AllocFreeBlock();
+    if (!node) {
         SpinUnlockIrqRestore(&vmem_lock, flags);
         PANIC("VMemFree: Out of free list nodes");
     }
-    new_block->base = start_vaddr;
-    new_block->size = size;
+    node->base = start_vaddr;
+    node->size = size;
+    node->next = NULL;
 
-    // Insert into appropriate sorted list and merge
-    VMemFreeBlock *prev = NULL, *current = *free_list;
-    while (current && current->base < new_block->base) {
-        prev = current;
-        current = current->next;
+    // Insert sorted by base address
+    VMemFreeBlock* prev = NULL;
+    VMemFreeBlock* cur = *free_list;
+    while (cur && cur->base < node->base) {
+        prev = cur;
+        cur = cur->next;
     }
 
-    // Merge with next block?
-    if (current && new_block->base + new_block->size == current->base) {
-        current->base = new_block->base;
-        current->size += new_block->size;
-        ReleaseFreeBlock(new_block);
-        new_block = current;
+    // Link in
+    node->next = cur;
+    if (prev) prev->next = node; else *free_list = node;
+
+    // Coalesce with next
+    if (node->next && (node->base + node->size == node->next->base)) {
+        VMemFreeBlock* next = node->next;
+        node->size += next->size;
+        node->next = next->next;
+        ReleaseFreeBlock(next);
     }
 
-    // Merge with previous block?
-    if (prev && prev->base + prev->size == new_block->base) {
-        prev->size += new_block->size;
-        if (new_block == current) {
-             prev->next = current->next;
-             ReleaseFreeBlock(current);
-        }
-        ReleaseFreeBlock(new_block);
-    } else if (new_block != current) {
-        if (prev) {
-            new_block->next = prev->next;
-            prev->next = new_block;
-        } else {
-            new_block->next = *free_list;
-            *free_list = new_block;
-        }
+    // Coalesce with previous
+    if (prev && (prev->base + prev->size == node->base)) {
+        prev->size += node->size;
+        prev->next = node->next;
+        ReleaseFreeBlock(node);
     }
 
     vmem_frees++;
@@ -437,84 +432,38 @@ void VMemFreeWithGuards(void* ptr, uint64_t size) {
 }
 
 uint64_t VMemGetPhysAddr(uint64_t vaddr) {
-    uint64_t pdp_phys = VMemGetPageTablePhys((uint64_t)kernel_space.pml4, vaddr, 0, 0);
+    // Walk PML4 -> PDP -> PD. At PD, handle both 2MB (PAGE_LARGE) and 4KB pages.
+    uint64_t pml4_phys = (uint64_t)kernel_space.pml4;
+    uint64_t pdp_phys = VMemGetPageTablePhys(pml4_phys, vaddr, 0, 0);
     if (!pdp_phys) return 0;
 
     uint64_t pd_phys = VMemGetPageTablePhys(pdp_phys, vaddr, 1, 0);
     if (!pd_phys) return 0;
 
-    uint64_t pt_phys = VMemGetPageTablePhys(pd_phys, vaddr, 2, 0);
-    if (!pt_phys) return 0;
-
-    // Access PT through identity mapping if possible
-    uint64_t* pt_virt;
-    if (pt_phys < IDENTITY_MAP_SIZE) {
-        pt_virt = (uint64_t*)pt_phys;
-    } else {
-        pt_virt = (uint64_t*)PHYS_TO_VIRT(pt_phys);
-    }
-    int pt_index = (vaddr >> PT_SHIFT) & PT_INDEX_MASK;
-
-    if (!(pt_virt[pt_index] & PAGE_PRESENT)) return 0;
-
-    return (pt_virt[pt_index] & PT_ADDR_MASK) | (vaddr & PAGE_MASK);
-}
-
-void VMemMapKernel(uint64_t kernel_phys_start, uint64_t kernel_phys_end) {
-    (void)kernel_phys_start;
-    (void)kernel_phys_end;
+    // Access PD
+    uint64_t* pd_virt = (pd_phys < IDENTITY_MAP_SIZE) ? (uint64_t*)pd_phys : (uint64_t*)PHYS_TO_VIRT(pd_phys);
+    int pd_index = (vaddr >> PD_SHIFT) & PT_INDEX_MASK;
+    uint64_t pde = pd_virt[pd_index];
 
-    PrintKernelSuccess("VMem: VMem: Mapping kernel sections...\n");
+    if (!(pde & PAGE_PRESENT)) return 0;
 
-    // Map .text section (read-only)
-    uint64_t text_start = PAGE_ALIGN_DOWN((uint64_t)_text_start);
-    uint64_t text_end = PAGE_ALIGN_UP((uint64_t)_text_end);
-    for (uint64_t paddr = text_start; paddr < text_end; paddr += PAGE_SIZE) {
-        uint64_t vaddr = paddr + KERNEL_VIRTUAL_OFFSET;
-        int result = VMemMap(vaddr, paddr, PAGE_PRESENT);
-        if (result != VMEM_SUCCESS) {
-            PANIC_CODE("VMemMapKernel: Failed to map .text page!", result);
-        }
+    if (pde & PAGE_LARGE) {
+        // 2MB page: physical base is 2MB aligned from PDE
+        uint64_t base = pde & PT_ADDR_MASK; // upper bits contain frame; PT_ADDR_MASK works for PD too
+        return (base & ~((uint64_t)HUGE_PAGE_SIZE - 1)) | (vaddr & (HUGE_PAGE_SIZE - 1));
     }
-    PrintKernel("  .text mapped (RO): 0x"); PrintKernelHex(text_start); PrintKernel(" - 0x"); PrintKernelHex(text_end); PrintKernel("\n");
 
-    // Map .rodata section (read-only)
-    uint64_t rodata_start = PAGE_ALIGN_DOWN((uint64_t)_rodata_start);
-    uint64_t rodata_end = PAGE_ALIGN_UP((uint64_t)_rodata_end);
-    for (uint64_t paddr = rodata_start; paddr < rodata_end; paddr += PAGE_SIZE) {
-        uint64_t vaddr = paddr + KERNEL_VIRTUAL_OFFSET;
-        int result = VMemMap(vaddr, paddr, PAGE_PRESENT);
-        if (result != VMEM_SUCCESS) {
-            PANIC_CODE("VMemMapKernel: Failed to map .rodata page!", result);
-        }
-    }
-    PrintKernel("  .rodata mapped (RO): 0x"); PrintKernelHex(rodata_start); PrintKernel(" - 0x"); PrintKernelHex(rodata_end); PrintKernel("\n");
+    // Otherwise, continue to PT for 4KB page
+    uint64_t pt_phys = VMemGetPageTablePhys(pd_phys, vaddr, 2, 0);
+    if (!pt_phys) return 0;
 
-    // Map .data section (read-write)
-    uint64_t data_start = PAGE_ALIGN_DOWN((uint64_t)_data_start);
-    uint64_t data_end = PAGE_ALIGN_UP((uint64_t)_data_end);
-    for (uint64_t paddr = data_start; paddr < data_end; paddr += PAGE_SIZE) {
-        uint64_t vaddr = paddr + KERNEL_VIRTUAL_OFFSET;
-        int result = VMemMap(vaddr, paddr, PAGE_WRITABLE);
-        if (result != VMEM_SUCCESS) {
-            PANIC_CODE("VMemMapKernel: Failed to map .data page!", result);
-        }
-    }
-    PrintKernel("  .data mapped (RW): 0x"); PrintKernelHex(data_start); PrintKernel(" - 0x"); PrintKernelHex(data_end); PrintKernel("\n");
+    uint64_t* pt_virt = (pt_phys < IDENTITY_MAP_SIZE) ? (uint64_t*)pt_phys : (uint64_t*)PHYS_TO_VIRT(pt_phys);
+    int pt_index = (vaddr >> PT_SHIFT) & PT_INDEX_MASK;
+    uint64_t pte = pt_virt[pt_index];
 
-    // Map .bss section (read-write)
-    uint64_t bss_start = PAGE_ALIGN_DOWN((uint64_t)_bss_start);
-    uint64_t bss_end = PAGE_ALIGN_UP((uint64_t)_bss_end);
-    for (uint64_t paddr = bss_start; paddr < bss_end; paddr += PAGE_SIZE) {
-        uint64_t vaddr = paddr + KERNEL_VIRTUAL_OFFSET;
-        int result = VMemMap(vaddr, paddr, PAGE_WRITABLE);
-        if (result != VMEM_SUCCESS) {
-            PANIC_CODE("VMemMapKernel: Failed to map .bss page!", result);
-        }
-    }
-    PrintKernel("  .bss mapped (RW): 0x"); PrintKernelHex(bss_start); PrintKernel(" - 0x"); PrintKernelHex(bss_end); PrintKernel("\n");
+    if (!(pte & PAGE_PRESENT)) return 0;
 
-    PrintKernelSuccess("VMem: VMem: Kernel section mapping complete.\n");
+    return (pte & PT_ADDR_MASK) | (vaddr & PAGE_MASK);
 }
 
 int VMemIsPageMapped(uint64_t vaddr) {
@@ -536,30 +485,49 @@ void VMemFlushTLBSingle(uint64_t vaddr) {
 
 int VMemUnmap(uint64_t vaddr, uint64_t size) {
     if (size == 0) return VMEM_SUCCESS;
-    
-    size = PAGE_ALIGN_UP(size);
-    uint64_t num_pages = size / PAGE_SIZE;
-    
+
+    uint64_t start = PAGE_ALIGN_DOWN(vaddr);
+    uint64_t end = PAGE_ALIGN_UP(vaddr + size);
+    uint64_t num_pages = (end - start) / PAGE_SIZE;
+
     for (uint64_t i = 0; i < num_pages; i++) {
-        uint64_t current_vaddr = vaddr + (i * PAGE_SIZE);
-        
+        uint64_t current_vaddr = start + (i * PAGE_SIZE);
+
         irq_flags_t flags = SpinLockIrqSave(&vmem_lock);
-        
-        uint64_t pdp_phys = VMemGetPageTablePhys((uint64_t)kernel_space.pml4, current_vaddr, 0, 0);
+
+        uint64_t pml4_phys = (uint64_t)kernel_space.pml4;
+        uint64_t pdp_phys = VMemGetPageTablePhys(pml4_phys, current_vaddr, 0, 0);
         if (!pdp_phys) { SpinUnlockIrqRestore(&vmem_lock, flags); continue; }
 
         uint64_t pd_phys = VMemGetPageTablePhys(pdp_phys, current_vaddr, 1, 0);
         if (!pd_phys) { SpinUnlockIrqRestore(&vmem_lock, flags); continue; }
 
+        // Check for huge-page mapping (2MB) at PD level
+        uint64_t* pd_virt = (pd_phys < IDENTITY_MAP_SIZE) ? (uint64_t*)pd_phys : (uint64_t*)PHYS_TO_VIRT(pd_phys);
+        int pd_index = (current_vaddr >> PD_SHIFT) & PT_INDEX_MASK;
+        uint64_t pde = pd_virt[pd_index];
+        if ((pde & PAGE_PRESENT) && (pde & PAGE_LARGE)) {
+            // Only unmap if we are aligned and have enough remaining to cover the whole huge page
+            if (IS_HUGE_PAGE_ALIGNED(current_vaddr) && (end - current_vaddr) >= HUGE_PAGE_SIZE) {
+                pd_virt[pd_index] = 0;
+                kernel_space.used_pages -= (HUGE_PAGE_SIZE / PAGE_SIZE);
+                kernel_space.total_mapped -= HUGE_PAGE_SIZE;
+                SpinUnlockIrqRestore(&vmem_lock, flags);
+                // Flush once for the huge region
+                for (uint64_t off = 0; off < HUGE_PAGE_SIZE; off += PAGE_SIZE) {
+                    VMemFlushTLBSingle(current_vaddr + off);
+                }
+                // Skip the rest of the pages covered by this huge page
+                i += (HUGE_PAGE_SIZE / PAGE_SIZE) - 1;
+                continue;
+            }
+            // If not aligned/size insufficient, fall through to 4KB path (cannot partially unmap 2MB)
+        }
+
         uint64_t pt_phys = VMemGetPageTablePhys(pd_phys, current_vaddr, 2, 0);
         if (!pt_phys) { SpinUnlockIrqRestore(&vmem_lock, flags); continue; }
 
-        uint64_t* pt_virt;
-        if (pt_phys < IDENTITY_MAP_SIZE) {
-            pt_virt = (uint64_t*)pt_phys;
-        } else {
-            pt_virt = (uint64_t*)PHYS_TO_VIRT(pt_phys);
-        }
+        uint64_t* pt_virt = (pt_phys < IDENTITY_MAP_SIZE) ? (uint64_t*)pt_phys : (uint64_t*)PHYS_TO_VIRT(pt_phys);
         int pt_index = (current_vaddr >> PT_SHIFT) & PT_INDEX_MASK;
 
         if (pt_virt[pt_index] & PAGE_PRESENT) {
@@ -567,11 +535,11 @@ int VMemUnmap(uint64_t vaddr, uint64_t size) {
             kernel_space.used_pages--;
             kernel_space.total_mapped -= PAGE_SIZE;
         }
-        
+
         SpinUnlockIrqRestore(&vmem_lock, flags);
         VMemFlushTLBSingle(current_vaddr);
     }
-    
+
     return VMEM_SUCCESS;
 }
 
diff --git a/mm/VMem.h b/mm/VMem.h
index 3c57708..8941968 100644
--- a/mm/VMem.h
+++ b/mm/VMem.h
@@ -125,7 +125,6 @@ void VMemFree(void* vaddr, uint64_t size);
 int VMemMap(uint64_t vaddr, uint64_t paddr, uint64_t flags);
 int VMemUnmap(uint64_t vaddr, uint64_t size);
 void PrintVMemStats(void);
-void VMemMapKernel(uint64_t kernel_phys_start, uint64_t kernel_phys_end);
 
 // Safer allocation with unmapped guard pages
 void* VMemAllocWithGuards(uint64_t size);
@@ -135,6 +134,8 @@ void VMemFreeWithGuards(void* ptr, uint64_t size);
 void* VMemAllocStack(uint64_t size);
 void VMemFreeStack(void* stack_top, uint64_t size);
 
+// Huge page mapping
+int VMemMapHuge(uint64_t vaddr, uint64_t paddr, uint64_t flags);
 // MMIO-specific mapping functions (bypass RAM validation for hardware registers)
 int VMemMapMMIO(uint64_t vaddr, uint64_t paddr, uint64_t size, uint64_t flags);
 void VMemUnmapMMIO(uint64_t vaddr, uint64_t size);