diff --git a/.pandoc/pandoc.yaml b/.pandoc/pandoc.yaml index a7b3321d..f7f0651a 100644 --- a/.pandoc/pandoc.yaml +++ b/.pandoc/pandoc.yaml @@ -5,7 +5,34 @@ author: - Dean T. header-includes: - \usepackage{fvextra} + - \usepackage{geometry} - \usepackage[page,toc,titletoc,title]{appendix} + - \usepackage{fancyhdr} + - \pagestyle{fancy} + - \fancyhead[LE]{\leftmark} + - \fancyhead[RE]{\thepage} + - \fancyhead[LO]{\thepage} + - \fancyhead[RO]{\rightmark} + - \usepackage{float} + - | + ```{=latex} + \let\origfigure\figure + \let\endorigfigure\endfigure + \renewenvironment{figure}[1][2] { + \expandafter\origfigure\expandafter[H] + } { + \endorigfigure + } + ``` - \DefineVerbatimEnvironment{Highlighting}{Verbatim}{breaklines,commandchars=\\\{\}} +geometry: + - tmargin=2.5cm + - bmargin=2.5cm + - lmargin=2.5cm + - rmargin=2.5cm + - headsep =0.89cm + - footskip =0.89cm + - columnsep=1.5cm + - headheight=1.5cm book: true --- diff --git a/02_Architecture/02_Hello_World.md b/02_Architecture/02_Hello_World.md index 1ade240d..cdeeb9c0 100644 --- a/02_Architecture/02_Hello_World.md +++ b/02_Architecture/02_Hello_World.md @@ -55,15 +55,15 @@ static int init_serial() { outb(PORT + 2, 0xC7); // Enable FIFO, clear them, with 14-byte threshold outb(PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set outb(PORT + 4, 0x1E); // Set in loopback mode, test the serial chip - outb(PORT + 0, 0xAE); // Test serial chip (send byte 0xAE and check if serial returns same byte) + outb(PORT + 0, 0xAE); // Send a test byte - // Check if serial is faulty (i.e: not same byte as sent) + // Check that we received the same test byte we sent if(inb(PORT + 0) != 0xAE) { return 1; } - // If serial is not faulty set it in normal operation mode - // (not-loopback with IRQs enabled and OUT#1 and OUT#2 bits enabled) + // If serial is not faulty set it in normal operation mode: + // not-loopback with IRQs enabled and OUT#1 and OUT#2 bits enabled outb(PORT + 4, 0x0F); return 0; } diff --git a/02_Architecture/04_GDT.md b/02_Architecture/04_GDT.md index 791a03bd..459500b9 100644 --- a/02_Architecture/04_GDT.md +++ b/02_Architecture/04_GDT.md @@ -260,18 +260,18 @@ If not familiar with inline assembly, check the appendix on using inline assembl void flush_gdt() { asm volatile("\ - mov $0x10, %ax \n\ - mov %ax, %ds \n\ - mov %ax, %es \n\ - mov %ax, %fs \n\ - mov %ax, %gs \n\ - mov %ax, %ss \n\ - \n\ - pop %rdi \n\ - push $0x8 \n\ - push %rdi \n\ - lretq \n\ - "); + mov $0x10, %ax \n\ + mov %ax, %ds \n\ + mov %ax, %es \n\ + mov %ax, %fs \n\ + mov %ax, %gs \n\ + mov %ax, %ss \n\ + \n\ + pop %rdi \n\ + push $0x8 \n\ + push %rdi \n\ + lretq \n\ + "); } ``` diff --git a/02_Architecture/06_ACPITables.md b/02_Architecture/06_ACPITables.md index e2570997..b607fe02 100644 --- a/02_Architecture/06_ACPITables.md +++ b/02_Architecture/06_ACPITables.md @@ -36,11 +36,11 @@ As already mentioned there are two different version of RSDP, basic data structu ```c struct RSDPDescriptor { - char Signature[8]; - uint8_t Checksum; - char OEMID[6]; - uint8_t Revision; - uint32_t RsdtAddress; + char Signature[8]; + uint8_t Checksum; + char OEMID[6]; + uint8_t Revision; + uint32_t RsdtAddress; } __attribute__ ((packed)); ``` @@ -55,13 +55,12 @@ Where the fields are: The structure for the v2 header is an extension of the previous one, so the fields above are still valid, but in addition it has also the following extra-fields: ```c -struct RSDP2Descriptor -{ - //v1 fields - uint32_t Length; - uint64_t XSDTAddress; - uint8_t ExtendedChecksum; - uint8_t Reserved[3]; +struct RSDP2Descriptor { + //v1 fields + uint32_t Length; + uint64_t XSDTAddress; + uint8_t ExtendedChecksum; + uint8_t Reserved[3]; }; ``` @@ -75,11 +74,11 @@ Before proceeding let's explain little bit better the validation. For both versi ```c bool validate_RSDP(char *byte_array, size_t size) { - uint32_t sum = 0; - for(int i = 0; i < size; i++) { - sum += byte_array[i]; - } - return (sum & 0xFF) == 0; + uint32_t sum = 0; + for(int i = 0; i < size; i++) { + sum += byte_array[i]; + } + return (sum & 0xFF) == 0; } ``` @@ -101,15 +100,15 @@ Since every SDT table contains different type of information, they are all diffe ```c struct ACPISDTHeader { - char Signature[4]; - uint32_t Length; - uint8_t Revision; - uint8_t Checksum; - char OEMID[6]; - char OEMTableID[8]; - uint32_t OEMRevision; - uint32_t CreatorID; - uint32_t CreatorRevision; + char Signature[4]; + uint32_t Length; + uint8_t Revision; + uint8_t Checksum; + char OEMID[6]; + char OEMTableID[8]; + uint32_t OEMRevision; + uint32_t CreatorID; + uint32_t CreatorRevision; }; ``` * The second part is the table itself, every SDT has it's own table @@ -123,16 +122,14 @@ The RSDT is an SDT header followed by an array of `uint32_t`s, representing the The XSDT is the same, except the array is of `uint64_t`s. ```c -struct RSDP -{ - ACPISDTHeader sdtHeader; //signature "RSDP" - uint32_t sdtAddresses[]; +struct RSDP { + ACPISDTHeader sdtHeader; //signature "RSDP" + uint32_t sdtAddresses[]; }; -struct XSDT -{ - ACPISDTHeader sdtHeader; //signature "XSDT" - uint64_t sdtAddresses[]; +struct XSDT { + ACPISDTHeader sdtHeader; //signature "XSDT" + uint64_t sdtAddresses[]; }; ``` diff --git a/02_Architecture/11_Keyboard_Driver_Implementation.md b/02_Architecture/11_Keyboard_Driver_Implementation.md index 98a16e47..24a424ee 100644 --- a/02_Architecture/11_Keyboard_Driver_Implementation.md +++ b/02_Architecture/11_Keyboard_Driver_Implementation.md @@ -43,8 +43,6 @@ If we want to store just the scancode we don't need much more, so we can already ```c void keyboard_driver_irq_handler() { - - uint8_t scancode = inb(0x60); // Read byte from the Keyboard data port keyboard_buffer[buf_position] = scancode; @@ -113,7 +111,9 @@ Now by changing the `current_state` variable, we can change how the code will tr uint8_t current_state; void init_keyboard() { - // Do other initialization stuff like: clean the keyboard buffer, identify the scancode set, enable the IRQ etc. + // You'll want to do other setup here in your own driver: + // ensure the input buffer of the keyboard is empty, check which scancode + // set is in use, enable irqs. current_state = NORMAL_STATE; } @@ -121,12 +121,11 @@ void keyboard_driver_irq_handler() { int scancode = inb(0x60); // Read byte from the Keyboard data port if (scancode == 0xE0) { current_state = PREFIX_STATE - // We have read a prefix, so let's update the state and finish here - // this is a very simple scenario, there could be more needed depending on the design + // We have read a prefix, so update the state and exit. return; } if (current_state == PREFIX_STATE) { - // Do what you need to store the key_code and eventually translate it to the kernel_code and return to the normal state + // Store the next part of the scancode, then return to normal state. current_state = NORMAL_STATE; } } @@ -214,10 +213,11 @@ We could use the following: ```c -//an example of our kernel-specific scancodes +//an example of our kernel-specific scancodes: +//note that these are totally arbitrary and can be whatever you want. typedef enum kernel_scancodes { [ ... ] - F1 = 0xAABBCCDD, //this can be defined to whatever value you want, the exact value is totally arbitrary. + F1 = 0xAABBCCDD, [ ... ] }; diff --git a/03_Video_Output/02_DrawingTextOnFB.md b/03_Video_Output/02_DrawingTextOnFB.md index 72e02471..054c8d8a 100644 --- a/03_Video_Output/02_DrawingTextOnFB.md +++ b/03_Video_Output/02_DrawingTextOnFB.md @@ -122,8 +122,8 @@ All the fields are 4 bytes in size, so creating a structure that can hold it is Let's assume from now on that we have a data structure called PSF_font with all the fields specified above. The first thing that we need of course, is to access to this variable: ```C -// We have linked _binary_font_psf_start from another .o file so we must specify that we are dealing -// with an external variable +// We have linked _binary_font_psf_start from another .o file so we must +// specify that we are dealing with an external variable. extern char _binary_font_psf_start; PSF_font *default_font = (PSF_font *)&_binary_font_psf_start ``` @@ -157,15 +157,18 @@ Below an example of how a glyph is stored: The glyphs start right after the psf header, the address of the first character will be then: ```C -uint8_t* first_glyph = (uint8_t*) &_binary_font_psf_start + default_font->headersize +uint8_t* first_glyph = (uint8_t*) &_binary_font_psf_start + + default_font->headersize ``` Since we know that every glyph has the same size, and this is available in the PSF_Header, if we want to access the *i-th* character, we just need to do the following: ```C -uint8_t* selected_glyph_v1 = (uint8_t*) &_binary_font_psf_start + sizeof(PSFv1_Header_Struct) + (i * default_font->bytesperglyph); //psf_v1 +uint8_t* selected_glyph_v1 = (uint8_t*) &_binary_font_psf_start + + sizeof(PSFv1_Header_Struct) + (i * default_font->bytesperglyph); -uint8_t* selected_glyph_v2 = (uint8_t*) &_binary_font_psf_start + default_font->headersize + (i * default_font->bytesperglyph); //psf_v2 +uint8_t* selected_glyph_v2 = (uint8_t*) &_binary_font_psf_start + + default_font->headersize + (i * default_font->bytesperglyph); ``` Where in the v1 case, `PSFv1_Header_Struct` is just the name of the struct containing the PSFv1 definition. diff --git a/04_Memory_Management/01_Overview.md b/04_Memory_Management/01_Overview.md index 584b6060..9e94b8d2 100644 --- a/04_Memory_Management/01_Overview.md +++ b/04_Memory_Management/01_Overview.md @@ -17,13 +17,13 @@ We will cover the following topics: *Authors note: don't worry, we will try to keep it as simple as possible, using basic algorithms and explaining all the gray areas as we go. The logic may sometimes be hard to follow, you will most likely have to go through several reads of this part multiple times.* -Each of the layers has a dedicated section below, however we'll start with a high level look at how they fit together. Before proceeding let's briefly define the concepts above: +Each of the layers has a dedicated chapter, however we'll start with a high level look at how they fit together. Before proceeding let's briefly define the concepts above: | Memory Management Layer | Description | -|---|---| +|---|------| | Physical Memory Manager | Responsible for keeping track of which parts of the available hardware memory (usually ram) are free/in-use. It usually allocates in fixed size blocks, the native page size. This is 4096 bytes on x86.| | Paging | It introduces the concepts of *virtual memory* and *virtual addresses*, providing the OS with a bigger address space, protection to the data and code in its pages, and isolation between programs. | -| Virtual memory manager | For a lot of projects, the VMM and paging will be the same thing. However the VMM should be seen as the virtual memory *manager*, and paging is just one tool that it uses to accomplish its job: ensuring that a program has memory where it needs it, when it needs it. Often this is just mapping physical ram to the requested virtual address (via paging or segmentation), but it can evolve into stealing pages from other processes. | +| Virtual memory manager | For a lot of projects, the VMM and paging will be the same thing. However the VMM should be seen as the virtual memory *manager*, and paging is just one tool that it uses to accomplish its job: ensuring that a program has memory where it needs it, when it needs it. Often this is just mapping physical ram to the requested virtual address (via paging or segmentation) | | Heap Allocator | The VMM can handle page-sized allocations just fine, but that is not always useful. A heap allocator allows for allocations of any size, big or small. | ## PMM - Physical Memory Manager diff --git a/04_Memory_Management/05_Heap_Allocation.md b/04_Memory_Management/05_Heap_Allocation.md index 59cbbd52..cb1caacd 100644 --- a/04_Memory_Management/05_Heap_Allocation.md +++ b/04_Memory_Management/05_Heap_Allocation.md @@ -90,7 +90,8 @@ What we have so far is already an allocation algorithm, that's easy to implement Its implementation is very simple: ```c -uint8_t *cur_heap_position = 0; //This is just pseudocode in real world this will be a memory location +uint8_t *cur_heap_position = 0; //Just an example, in the real world you would use + //a virtual address allocated from the VMM. void *first_alloc(size_t size) { uint8_t *addr_to_return = cur_heap_position; cur_heap_position= cur_heap_position + size; @@ -401,7 +402,7 @@ if (prev_node != NULL && prev_node->status == FREE) { ``` What we're describing here is the left node being "swallowed" by the right one, and growing in size. The memory that the left node owns and is responsible for is now part of the right oneTo make it easier to understand, consider the portion of a hypothetical heap in the picture below: -![heap_example_start](/Images/heapexample.png) +![Heap initial status](/Images/heapexample.png) Basically the heap starts from address 0, the first node is marked as free and the next two nodes are both used. Now imagine that `free()` is called on the second address (for this exammple we consider size of the heap node structure to be just of 2 bytes): @@ -413,7 +414,7 @@ free(0x27); //Remember the overhead This means that the allocator (before marking this location as free and returning) will check if it is possible to merge first to the left (YES) and then to the right (NO since the next node is still in use) and then will proceed with a merge only on the left side. The final result will be: -![heap_example_after_merge](/Images/heap_example_after_merge.png) +![The heap status after the merge](/Images/heap_example_after_merge.png) The fields in bold are the fields that are changed. The exact implementation of this code is left to the reader. diff --git a/05_Scheduling/03_Processes_And_Threads.md b/05_Scheduling/03_Processes_And_Threads.md index b727fe80..ff4faae4 100644 --- a/05_Scheduling/03_Processes_And_Threads.md +++ b/05_Scheduling/03_Processes_And_Threads.md @@ -65,7 +65,7 @@ Creating a process is pretty trivial. We need a place to store the new `process_ size_t next_free_pid = 0; process_t* create_process(char* name, void(*function)(void*), void* arg) { - process_t* process = alloc(sizeof(process_t)); // We should have an allocation function available + process_t* process = alloc(sizeof(process_t)); strncpy(process->name, name, NAME_MAX_LEN); process->pid = next_free_pid++; diff --git a/06_Userspace/05_Example_ABI.md b/06_Userspace/05_Example_ABI.md index 7b3cdc7d..7d39a82b 100644 --- a/06_Userspace/05_Example_ABI.md +++ b/06_Userspace/05_Example_ABI.md @@ -41,8 +41,7 @@ We're going to implement a wrapper function for system calls in C, purely for co ```c __attribute__((naked)) -void do_syscall(uint64_t num, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) -{ +void do_syscall(uint64_t num, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) { asm ("int $0x50" ::: "rdi", "rsi", "rdx", "rcx", "r8", "memory"); } ``` @@ -54,8 +53,7 @@ This function also uses the `naked` attribute. If unfamiliar with attributes, th Now, let's combine our wrapper function with our example system call from above. We're going to write a `memcpy` function that could be called by another code, but uses the system call internally: ```c -void memcpy(void* src, void* dest, size_t count) -{ +void memcpy(void* src, void* dest, size_t count) { return do_syscall(3, (uint64_t)src, (uint64_t)dest, (uint64_t)count, 0, 0); } ``` diff --git a/07_IPC/01_Overview.md b/07_IPC/01_Overview.md index c0412cb3..a93c8c81 100644 --- a/07_IPC/01_Overview.md +++ b/07_IPC/01_Overview.md @@ -2,14 +2,14 @@ So far we've put a lot of effort into making sure each program (represented by a process in our kernel) is completely isolated from all others. This is great for safety and security, but it presents a big problem: what if we want two processes to communicate with each other? -The answer to this is some form of inter-process communication (IPC). This part will look at some basic implementations for the common types and will hopefully serve a good jumping off point for further implementations. +The answer to this is some form of inter-process communication (aka `IPC`). This part will look at some basic implementations for the common types and will hopefully serve a good jumping off point for further implementations. ## Shared Memory vs Message Passing All IPC can be broken down into two forms: -- Shared Memory: In this case the kernel maps a set of physical pages into a process's address space, and then maps the same physical pages into another processes address space. Now the two processes can communicate by reading and writing to this shared memory. This will be explained in the [Shared_Memory](02_Shared_Memory.md) chapter -- Message Passing: This works by writing the message we want to send into a buffer, and then giving that buffer to the kernel. The kernel will then pass that buffer to the destination process. The chapter [Message Passing](03_Message_Passing.md) will cover this topic. +- _Shared Memory_: In this case the kernel maps a set of physical pages into a process's address space, and then maps the same physical pages into another processes address space. Now the two processes can communicate by reading and writing to this shared memory. This will be explained in the [Shared Memory](02_Shared_Memory.md) chapter +- _Message Passing_: This works by writing the message we want to send into a buffer, and then giving that buffer to the kernel. The kernel will then pass that buffer to the destination process. The chapter [Message Passing](03_Message_Passing.md) will cover this topic. ## Single-Copy vs Double-Copy diff --git a/08_VirtualFileSystem/01_Overview.md b/08_VirtualFileSystem/01_Overview.md index a0d33260..c57b0860 100644 --- a/08_VirtualFileSystem/01_Overview.md +++ b/08_VirtualFileSystem/01_Overview.md @@ -34,6 +34,6 @@ How do we combine the output of all these different filesystems in a uniform way How the VFS presents itself is another design decision, but the two common ways to do it are: -* Each mounted filesystem is distinct filesystem, with a separate root. Typically each root is given a single letter to identify it. This is the MS-DOS/Windows approach. This is called the *multi-root* approach.RREADME.md +* Each mounted filesystem is distinct filesystem, with a separate root. Typically each root is given a single letter to identify it. This is the MS-DOS/Windows approach. This is called the *multi-root* approach. * Each mounted filesystem exists within a single global tree, under a single root. This is the usual unix approach, where a directory can actually a window into another filesystem. diff --git a/08_VirtualFileSystem/02_VirtualFileSystem.md b/08_VirtualFileSystem/02_VirtualFileSystem.md index 05aa255c..71098c44 100644 --- a/08_VirtualFileSystem/02_VirtualFileSystem.md +++ b/08_VirtualFileSystem/02_VirtualFileSystem.md @@ -1,7 +1,5 @@ # The Virtual File System -## Overview - Nowadays there are many OSes available for many different hardware architectures, and probably there are even more file systems. One of the problems for the OS is to provide a generic enough interface to support as many file systems as possible, and making it easy to implement new ones, in the future. This is where the VFS layer comes to aid, in this chapter we are going to see in detail how it works, and make a basic implementation of it. To keep our design simple, the features of our VFS driver will be: diff --git a/09_Loading_Elf/01_Elf_Theory.md b/09_Loading_Elf/01_Elf_Theory.md index ad27ceea..6309e3b8 100644 --- a/09_Loading_Elf/01_Elf_Theory.md +++ b/09_Loading_Elf/01_Elf_Theory.md @@ -1,6 +1,8 @@ # Executable Linker Format -The *executable and linker file* (ELF) is an open standard for programs, libraries and shards of code and data that are waiting to linked. It's the most common format used by linux and BSD operating systems, and sees some use elsewhere. It's also the most common format for programs in hobby as it's quite simple to implement and it's public specification is feature-complete. +## ELF Overview + +The *executable and linker file* (ELF) is an open standard for programs, libraries and shards of code and data that are waiting to be linked. It's the most common format used by linux and BSD operating systems, and sees some use elsewhere. It's also the most common format for programs in hobby operating systems as it's quite simple to implement and it's public specification is feature-complete. That's not to say ELF is the *only* format for these kinds of files (there are others like PE/portable execute, a.out or even mach-o), but the ELF format is the best for our purposes. A majority of operating systems have come to a similar to conclusion. We could also use our own format, but be aware this requires a compiler capable of outputting it (meaning either write our own compiler, or modify an existing one - a lot of work!). @@ -25,7 +27,7 @@ typedef uint8_t Elf64_UnsignedChar; All structs in the base ELF spec are defined using these types, and so we will use them too. Note that their exact definitions *will* change depending on the target platform. -# Layout Of An ELF +## Layout Of An ELF The format has four main sections: diff --git a/99_Appendices/F_Memory_Protection.md b/99_Appendices/F_Memory_Protection.md index 204940a4..f123e312 100644 --- a/99_Appendices/F_Memory_Protection.md +++ b/99_Appendices/F_Memory_Protection.md @@ -53,8 +53,7 @@ Now that's a lot of words, let's have a look at a quick example of how it might An example in c might look like (note these functions are made up for the example, and must be implemented yourself): ```c -void* page_heap_alloc(size_t size, bool detect_overrun) -{ +void* page_heap_alloc(size_t size, bool detect_overrun) { const size_t pages_required = (size / PAGE_SIZE_IN_BYTES) + 1; void* pages = pmm_alloc_pages(pages_required); uint64_t next_alloc_address = get_next_addr(); diff --git a/Images/memorymanager_example.jpg b/Images/memorymanager_example.jpg index 23e1b1eb..9cb58e4f 100644 Binary files a/Images/memorymanager_example.jpg and b/Images/memorymanager_example.jpg differ