Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PAL/vm-common] Get memory regions from VMM using "etc/e820" fw_cfg file #28

Merged
merged 1 commit into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pal/src/host/vm-common/kernel_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,12 @@ int memory_preload_ranges(e820_table_entry* e820_entries, size_t e820_entries_si
if (e820_entries[i].type == E820_ADDRESS_RANGE_MEMORY)
continue;

if (PCI_HOLE_ADDR <= e820_entries[i].address &&
e820_entries[i].address + e820_entries[i].size <= PCI_HOLE_ADDR + PCI_HOLE_SIZE) {
/* reported reserved memory region is fully consumed by the PCI hole region */
continue;
}

if (e820_entries[i].address < PAGE_TABLES_ADDR + PAGE_TABLES_SIZE &&
PAGE_TABLES_ADDR < e820_entries[i].address + e820_entries[i].size) {
/* a reserved range overlaps with our page tables range */
Expand Down Expand Up @@ -483,7 +489,7 @@ int memory_preload_ranges(e820_table_entry* e820_entries, size_t e820_entries_si
ret = callback(SHARED_MEM_ADDR, SHARED_MEM_SIZE, "shared_memory");
if (ret < 0)
return -PAL_ERROR_NOMEM;
ret = callback(0x80000000UL, 0x80000000UL, "qemu_pci_hole");
ret = callback(PCI_HOLE_ADDR, PCI_HOLE_SIZE, "qemu_pci_hole");
if (ret < 0)
return -PAL_ERROR_NOMEM;

Expand Down
8 changes: 8 additions & 0 deletions pal/src/host/vm-common/kernel_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#define SHARED_MEM_ADDR 0x29200000UL /* shared memory occupies [658MB, 896MB) */
#define SHARED_MEM_SIZE (238UL * 1024 * 1024) /* 238MB */

#define PCI_HOLE_ADDR 0x80000000UL /* QEMU's memory hole + PCI (BARs, LAPIC, IOAPIC) */
#define PCI_HOLE_SIZE 0x80000000UL /* 2GB */

/* equivalent to E820_TABLE_ENTRY in EFI_HOB_E820_TABLE (needs to be packed) */
#define E820_ADDRESS_RANGE_MEMORY 1
#define E820_ADDRESS_RANGE_RESERVED 2
Expand All @@ -23,6 +26,11 @@ typedef struct {
uint32_t type;
} __attribute__((packed)) e820_table_entry;

/* max number of entries in the E820 table; taken from QEMU sources */
#define E820_NR_ENTRIES 16

#define E820_TABLE_MAX_SIZE (E820_NR_ENTRIES * sizeof(e820_table_entry))

extern uint64_t g_pml4_table_base;

void* memory_get_shared_region(size_t size);
Expand Down
84 changes: 67 additions & 17 deletions pal/src/host/vm-common/kernel_vmm_inputs.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "api.h"
#include "pal_error.h"

#include "kernel_memory.h"
#include "kernel_time.h"
#include "kernel_vmm_inputs.h"
#include "vm_callbacks.h"
Expand Down Expand Up @@ -127,8 +128,9 @@ int cmdline_read_gramine_envs(const char* envs, int* out_envp_cnt, const char**
return cmdline_read_common(CMDLINE_ENVS, envs, out_envp_cnt, out_envp);
}

static uint16_t find_fw_cfg_selector(const char* fw_cfg_name) {
uint32_t fw_cfg_files_count = 0;
static int find_fw_cfg_selector(const char* fw_cfg_name, uint16_t* out_selector,
uint32_t* out_size) {
uint32_t fw_cfg_files_count = 0;
uint8_t* fw_cfg_files_count_raw = (uint8_t*)&fw_cfg_files_count;
vm_portio_writew(FW_CFG_PORT_SEL, FW_CFG_FILE_DIR);
for (size_t i = 0; i < sizeof(fw_cfg_files_count); i++)
Expand All @@ -137,9 +139,10 @@ static uint16_t find_fw_cfg_selector(const char* fw_cfg_name) {
/* QEMU provides in big-endian, but our x86-64 CPU is little-endian */
fw_cfg_files_count = __builtin_bswap32(fw_cfg_files_count);
if (fw_cfg_files_count > MAX_FW_CFG_FILES)
return 0;
return -PAL_ERROR_INVAL;

uint16_t fw_cfg_selector = 0;
uint32_t fw_cfg_size = 0;
for (size_t i = 0; i < fw_cfg_files_count; i++) {
struct FWCfgFile fw_cfg_file;
uint8_t* fw_cfg_file_raw = (uint8_t*)&fw_cfg_file;
Expand All @@ -148,27 +151,38 @@ static uint16_t find_fw_cfg_selector(const char* fw_cfg_name) {

if (strlen(fw_cfg_name) + 1 > sizeof(fw_cfg_file.name)) {
/* make sure the searched-for string is less than the fw_cfg file name limit (56) */
return 0;
return -PAL_ERROR_INVAL;
}

if (strcmp(fw_cfg_file.name, fw_cfg_name) == 0) {
fw_cfg_selector = fw_cfg_file.select;
fw_cfg_size = fw_cfg_file.size;
break;
}
}

return __builtin_bswap16(fw_cfg_selector);
if (!fw_cfg_selector || !fw_cfg_size)
return -PAL_ERROR_INVAL;

*out_selector = __builtin_bswap16(fw_cfg_selector);
*out_size = __builtin_bswap32(fw_cfg_size);
return 0;
}

int cmdline_init_args(char* cmdline_args, size_t cmdline_args_size) {
memset(cmdline_args, 0, cmdline_args_size);

uint16_t fw_cfg_selector = find_fw_cfg_selector("opt/gramine/args");
if (!fw_cfg_selector)
uint16_t fw_cfg_selector;
uint32_t fw_cfg_size;
int ret = find_fw_cfg_selector("opt/gramine/args", &fw_cfg_selector, &fw_cfg_size);
if (ret < 0)
return ret;

if (fw_cfg_size >= cmdline_args_size)
return -PAL_ERROR_INVAL;

vm_portio_writew(FW_CFG_PORT_SEL, fw_cfg_selector);
for (size_t i = 0; i < cmdline_args_size - 1; i++)
for (size_t i = 0; i < fw_cfg_size; i++)
cmdline_args[i] = vm_portio_readb(FW_CFG_PORT_SEL + 1);

uint32_t cmdline_args_len = strlen(cmdline_args);
Expand All @@ -182,12 +196,17 @@ int cmdline_init_args(char* cmdline_args, size_t cmdline_args_size) {
int cmdline_init_envs(char* cmdline_envs, size_t cmdline_envs_size) {
memset(cmdline_envs, 0, cmdline_envs_size);

uint16_t fw_cfg_selector = find_fw_cfg_selector("opt/gramine/envs");
if (!fw_cfg_selector)
uint16_t fw_cfg_selector;
uint32_t fw_cfg_size;
int ret = find_fw_cfg_selector("opt/gramine/envs", &fw_cfg_selector, &fw_cfg_size);
if (ret < 0)
return ret;

if (fw_cfg_size >= cmdline_envs_size)
return -PAL_ERROR_INVAL;

vm_portio_writew(FW_CFG_PORT_SEL, fw_cfg_selector);
for (size_t i = 0; i < cmdline_envs_size - 1; i++)
for (size_t i = 0; i < fw_cfg_size; i++)
cmdline_envs[i] = vm_portio_readb(FW_CFG_PORT_SEL + 1);

uint32_t cmdline_envs_len = strlen(cmdline_envs);
Expand All @@ -199,12 +218,17 @@ int cmdline_init_envs(char* cmdline_envs, size_t cmdline_envs_size) {
}

int host_pwd_init(void) {
uint16_t fw_cfg_selector = find_fw_cfg_selector("opt/gramine/pwd");
if (!fw_cfg_selector)
uint16_t fw_cfg_selector;
uint32_t fw_cfg_size;
int ret = find_fw_cfg_selector("opt/gramine/pwd", &fw_cfg_selector, &fw_cfg_size);
if (ret < 0)
return ret;

if (fw_cfg_size >= sizeof(g_host_pwd))
return -PAL_ERROR_INVAL;

vm_portio_writew(FW_CFG_PORT_SEL, fw_cfg_selector);
for (size_t i = 0; i < sizeof(g_host_pwd) - 1; i++)
for (size_t i = 0; i < fw_cfg_size; i++)
g_host_pwd[i] = vm_portio_readb(FW_CFG_PORT_SEL + 1);

uint32_t len = strlen(g_host_pwd);
Expand All @@ -218,12 +242,17 @@ int host_pwd_init(void) {
int unixtime_init(char* unixtime_s, size_t unixtime_size) {
memset(unixtime_s, 0, unixtime_size);

uint16_t fw_cfg_selector = find_fw_cfg_selector("opt/gramine/unixtime_s");
if (!fw_cfg_selector)
uint16_t fw_cfg_selector;
uint32_t fw_cfg_size;
int ret = find_fw_cfg_selector("opt/gramine/unixtime_s", &fw_cfg_selector, &fw_cfg_size);
if (ret < 0)
return ret;

if (fw_cfg_size >= unixtime_size)
return -PAL_ERROR_INVAL;

vm_portio_writew(FW_CFG_PORT_SEL, fw_cfg_selector);
for (size_t i = 0; i < unixtime_size - 1; i++)
for (size_t i = 0; i < fw_cfg_size; i++)
unixtime_s[i] = vm_portio_readb(FW_CFG_PORT_SEL + 1);

uint32_t len = strlen(unixtime_s);
Expand All @@ -233,3 +262,24 @@ int unixtime_init(char* unixtime_s, size_t unixtime_size) {
/* note that `unixtime_s` is guaranteed to be NULL terminated and have at least one symbol */
return 0;
}

/* this func is used only in VM PAL (not in TDX PAL), so doesn't need to be hardened */
int e820_table_init(char* e820_table, size_t* e820_size, size_t max_e820_size) {
memset(e820_table, 0, max_e820_size);

uint16_t fw_cfg_selector;
uint32_t fw_cfg_size;
int ret = find_fw_cfg_selector("etc/e820", &fw_cfg_selector, &fw_cfg_size);
if (ret < 0)
return ret;

if (fw_cfg_size > max_e820_size)
return -PAL_ERROR_INVAL;

vm_portio_writew(FW_CFG_PORT_SEL, fw_cfg_selector);
for (size_t i = 0; i < fw_cfg_size; i++)
e820_table[i] = vm_portio_readb(FW_CFG_PORT_SEL + 1);

*e820_size = fw_cfg_size;
return 0;
}
17 changes: 13 additions & 4 deletions pal/src/host/vm-common/kernel_vmm_inputs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,24 @@
/* Copyright (C) 2023 Intel Corporation */

/*
* Read inputs from VMM. Currently three inputs:
* Read inputs from VMM. Currently the following inputs:
* - Command-line arguments
* - Host environment variables
* - PWD (host's current working directory)
* - initial UNIX time
* - E820 table of VMM-reserved memory ranges (only for VM PAL; TDX PAL uses TDX hobs)
*
* Gramine command-line args and host environment variables are read from fw_cfg QEMU pseudo-device.
* They are supposed to be put in one of the selectors above 0x19 and be in a special format (see
* below).
* Gramine command-line args, host environment variables, PWD, and initial UNIX time are all read
* from fw_cfg QEMU pseudo-device. They are supposed to be put in one of the selectors above 0x19
* and be in a special format (see below).
*
* The E820 table is also read from fw_cfg QEMU pseudo-device and uses the hard-coded selector
* "etc/e820" (file). Note that another selector FW_CFG_E820_TABLE was deprecated since QEMU v7.2.
*
* The selector with command-line args has the name "opt/gramine/args".
* The selector with environment variables has the name "opt/gramine/envs".
* The selector with PWD has the name "opt/gramine/pwd".
* The selector with initial UNIX time has the name "opt/gramine/unixtime_s".
*
* For details, see:
* - qemu.org/docs/master/specs/fw_cfg.html
Expand Down Expand Up @@ -76,3 +83,5 @@ int cmdline_read_gramine_envs(const char* envs, int* out_envp_cnt, const char**
int cmdline_init_envs(char* cmdline_envs, size_t cmdline_envs_size);

int unixtime_init(char* unixtime_s, size_t unixtime_size);

int e820_table_init(char* e820_table, size_t* e820_size, size_t max_e820_size);
15 changes: 9 additions & 6 deletions pal/src/host/vm/pal_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,15 @@ noreturn void pal_start_c(void) {
INIT_FAIL("Detected unsupported number of virtual CPUs: %u (supported: 1..%u)", num_cpus,
MAX_NUM_CPUS);

e820_table_entry e820 = { .address = 0x0,
.size = rdfwcfg(FW_CFG_RAM_SIZE, sizeof(uint64_t)),
.type = E820_ADDRESS_RANGE_MEMORY };
e820_table_entry e820[E820_NR_ENTRIES]; /* 16*20 = 320B, ok to allocate on stack */
size_t e820_size;

ret = memory_init(&e820, sizeof(e820), &g_pal_public_state.memory_address_start,
&g_pal_public_state.memory_address_end);
ret = e820_table_init((char*)e820, &e820_size, sizeof(e820));
if (ret < 0)
INIT_FAIL("Can't read E820 table from VMM");

ret = memory_init(e820, e820_size, &g_pal_public_state.memory_address_start,
&g_pal_public_state.memory_address_end);
if (ret < 0)
INIT_FAIL("Failed to initialize physical memory");

Expand All @@ -185,7 +188,7 @@ noreturn void pal_start_c(void) {
if (ret < 0)
INIT_FAIL("Failed to initialize page tables");

ret = memory_preload_ranges(&e820, sizeof(e820), &add_preloaded_range);
ret = memory_preload_ranges(e820, e820_size, &add_preloaded_range);
if (ret < 0)
INIT_FAIL("Failed to initialize preloaded ranges");

Expand Down