From 4223369e0a6862c892d40b9c36e818f794b735fb Mon Sep 17 00:00:00 2001 From: Austin Henriksen Date: Tue, 5 Mar 2024 17:10:42 -0500 Subject: [PATCH] Ordered Compilation (#694) --- .vscode/cspell.json | 1 + src/compilation_state.rs | 5 +- src/diagnostic_emitter.rs | 9 ++-- src/diagnostics/diagnostic.rs | 10 +--- src/lib.rs | 16 +++--- src/parsers/mod.rs | 2 +- src/patchers/encoding_patcher.rs | 13 ++--- src/utils/file_util.rs | 86 +++++++++++++++----------------- src/validators/mod.rs | 2 +- tests/files/a.slice | 0 tests/files/b.slice | 0 tests/files/c.slice | 0 tests/files/io.rs | 70 ++++++++++++++++++++++++++ 13 files changed, 135 insertions(+), 79 deletions(-) create mode 100644 tests/files/a.slice create mode 100644 tests/files/b.slice create mode 100644 tests/files/c.slice diff --git a/.vscode/cspell.json b/.vscode/cspell.json index a25f3ea5..4e8bcd1b 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -7,6 +7,7 @@ "canonicalized", "codepoints", "Commentable", + "deduped", "deque", "downcasted", "downcasting", diff --git a/src/compilation_state.rs b/src/compilation_state.rs index f36f9e0f..49fed7ef 100644 --- a/src/compilation_state.rs +++ b/src/compilation_state.rs @@ -5,13 +5,12 @@ use crate::diagnostic_emitter::{emit_totals, DiagnosticEmitter}; use crate::diagnostics::{get_totals, Diagnostic, Diagnostics}; use crate::slice_file::SliceFile; use crate::slice_options::SliceOptions; -use std::collections::HashMap; #[derive(Debug, Default)] pub struct CompilationState { pub ast: Ast, pub diagnostics: Diagnostics, - pub files: HashMap, + pub files: Vec, } impl CompilationState { @@ -19,7 +18,7 @@ impl CompilationState { CompilationState { ast: Ast::create(), diagnostics: Diagnostics::new(), - files: HashMap::new(), + files: Vec::new(), } } diff --git a/src/diagnostic_emitter.rs b/src/diagnostic_emitter.rs index c7cf72a8..0d53ea2b 100644 --- a/src/diagnostic_emitter.rs +++ b/src/diagnostic_emitter.rs @@ -5,7 +5,6 @@ use crate::slice_file::{SliceFile, Span}; use crate::slice_options::{DiagnosticFormat, SliceOptions}; use serde::ser::SerializeStruct; use serde::Serializer; -use std::collections::HashMap; use std::io::{Result, Write}; use std::path::Path; @@ -18,11 +17,11 @@ pub struct DiagnosticEmitter<'a, T: Write> { /// If true, diagnostic output will not be styled with colors (only used in `human` format). disable_color: bool, /// Provides the emitter access to the slice files that were compiled so it can extract snippets from them. - files: &'a HashMap, + files: &'a [SliceFile], } impl<'a, T: Write> DiagnosticEmitter<'a, T> { - pub fn new(output: &'a mut T, slice_options: &SliceOptions, files: &'a HashMap) -> Self { + pub fn new(output: &'a mut T, slice_options: &SliceOptions, files: &'a [SliceFile]) -> Self { DiagnosticEmitter { output, diagnostic_format: slice_options.diagnostic_format, @@ -115,8 +114,8 @@ impl<'a, T: Write> DiagnosticEmitter<'a, T> { )?; // Display the line of code where the error occurred. - let snippet = self.files.get(&span.file).unwrap().get_snippet(span.start, span.end); - writeln!(self.output, "{}", snippet)?; + let file = self.files.iter().find(|f| f.relative_path == span.file).unwrap(); + writeln!(self.output, "{}", file.get_snippet(span.start, span.end))?; Ok(()) } diff --git a/src/diagnostics/diagnostic.rs b/src/diagnostics/diagnostic.rs index 32c38f99..7e91686e 100644 --- a/src/diagnostics/diagnostic.rs +++ b/src/diagnostics/diagnostic.rs @@ -5,7 +5,6 @@ use crate::ast::Ast; use crate::grammar::{attributes, Attributable, Entity}; use crate::slice_file::{SliceFile, Span}; use crate::slice_options::SliceOptions; -use std::collections::HashMap; /// A diagnostic is a message that is reported to the user during compilation. /// It can either hold an [Error] or a [Lint]. @@ -158,12 +157,7 @@ impl Diagnostics { /// Returns the diagnostics this struct contains after it has patched and updated them. /// Lint levels can be configured via attributes or command line options, but these aren't applied until this runs. - pub fn into_updated( - mut self, - ast: &Ast, - files: &HashMap, - options: &SliceOptions, - ) -> Vec { + pub fn into_updated(mut self, ast: &Ast, files: &[SliceFile], options: &SliceOptions) -> Vec { // Helper function that checks whether a lint should be allowed according to the provided identifiers. fn is_lint_allowed_by<'b>(mut identifiers: impl Iterator, lint: &Lint) -> bool { identifiers.any(|identifier| identifier == "All" || identifier == lint.code()) @@ -186,7 +180,7 @@ impl Diagnostics { // If the diagnostic has a span, check if it's affected by an `allow` attribute on its file. if let Some(span) = diagnostic.span() { - let file = files.get(&span.file).expect("slice file didn't exist"); + let file = files.iter().find(|f| f.relative_path == span.file).expect("no file"); if is_lint_allowed_by_attributes(file, lint) { diagnostic.level = DiagnosticLevel::Allowed; } diff --git a/src/lib.rs b/src/lib.rs index 36e6efc5..83e904aa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,11 +31,11 @@ pub fn compile_from_options( let mut state = CompilationState::create(); // Recursively resolve any Slice files contained in the paths specified by the user. - let files = file_util::resolve_files_from(options, &mut state.diagnostics); + state.files = file_util::resolve_files_from(options, &mut state.diagnostics); // If any files were unreadable, return without parsing. Otherwise, parse the files normally. if !state.diagnostics.has_errors() { - compile_files(files, &mut state, options, patcher, validator); + compile_files(&mut state, options, patcher, validator); } state } @@ -50,29 +50,25 @@ pub fn compile_from_strings( let mut state = CompilationState::create(); // Create a Slice file from each of the strings. - let mut files = Vec::new(); for (i, &input) in inputs.iter().enumerate() { - files.push(SliceFile::new(format!("string-{i}"), input.to_owned(), false)) + let slice_file = SliceFile::new(format!("string-{i}"), input.to_owned(), false); + state.files.push(slice_file); } match options { - Some(slice_options) => compile_files(files, &mut state, slice_options, patcher, validator), - None => compile_files(files, &mut state, &SliceOptions::default(), patcher, validator), + Some(slice_options) => compile_files(&mut state, slice_options, patcher, validator), + None => compile_files(&mut state, &SliceOptions::default(), patcher, validator), } state } fn compile_files( - files: Vec, state: &mut CompilationState, options: &SliceOptions, patcher: unsafe fn(&mut CompilationState), validator: fn(&mut CompilationState), ) { - // Convert the `Vec` into a `HashMap` for easier lookup, and store it. - state.files = files.into_iter().map(|f| (f.relative_path.clone(), f)).collect(); - // Retrieve any preprocessor symbols defined by the compiler itself, or by the user on the command line. let defined_symbols = HashSet::from_iter(options.defined_symbols.clone()); diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index 4db3a080..e5e8bbf6 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -19,7 +19,7 @@ use crate::slice_file::SliceFile; use std::collections::HashSet; pub fn parse_files(state: &mut CompilationState, symbols: &HashSet) { - for file in state.files.values_mut() { + for file in &mut state.files { // Attempt to parse the file. let mut diagnostics = Diagnostics::new(); parse_file(file, &mut state.ast, &mut diagnostics, symbols.clone()); diff --git a/src/patchers/encoding_patcher.rs b/src/patchers/encoding_patcher.rs index 0e375773..4d6df304 100644 --- a/src/patchers/encoding_patcher.rs +++ b/src/patchers/encoding_patcher.rs @@ -58,7 +58,7 @@ pub unsafe fn patch_ast(compilation_state: &mut CompilationState) { struct EncodingPatcher<'a> { supported_encodings_cache: HashMap, - slice_files: &'a HashMap, + slice_files: &'a [SliceFile], diagnostics: &'a mut Diagnostics, } @@ -74,9 +74,10 @@ impl EncodingPatcher<'_> { } // Store which Slice encodings can possibly be supported based on the file's compilation mode. - let file_name = &entity_def.span().file; - let compilation_mode = self.slice_files.get(file_name).unwrap().compilation_mode(); - let mut supported_encodings = SupportedEncodings::new(match &compilation_mode { + let mut files = self.slice_files.iter(); + let slice_file = files.find(|f| f.relative_path == entity_def.span().file).unwrap(); + let compilation_mode = slice_file.compilation_mode(); + let mut supported_encodings = SupportedEncodings::new(match compilation_mode { CompilationMode::Slice1 => vec![Encoding::Slice1, Encoding::Slice2], CompilationMode::Slice2 => vec![Encoding::Slice2], }); @@ -232,8 +233,8 @@ impl EncodingPatcher<'_> { } fn get_mode_mismatch_note(&self, symbol: &impl Symbol) -> Option { - let file_name = &symbol.span().file; - let slice_file = self.slice_files.get(file_name).unwrap(); + let mut files = self.slice_files.iter(); + let slice_file = files.find(|f| f.relative_path == symbol.span().file).unwrap(); // Emit a note if the file's compilation mode wasn't explicitly set. match slice_file.mode.as_ref() { diff --git a/src/utils/file_util.rs b/src/utils/file_util.rs index cd22ee93..b3b0780b 100644 --- a/src/utils/file_util.rs +++ b/src/utils/file_util.rs @@ -3,8 +3,6 @@ use crate::diagnostics::{Diagnostic, Diagnostics, Error, Lint}; use crate::slice_file::SliceFile; use crate::slice_options::SliceOptions; -use std::collections::HashMap; -use std::hash::{Hash, Hasher}; use std::path::{Path, PathBuf}; use std::{fs, io}; @@ -16,69 +14,65 @@ struct FilePath { path: String, // The canonicalized path canonicalized_path: PathBuf, + // True for source files, false for reference files. + is_source: bool, } -impl TryFrom<&String> for FilePath { - type Error = io::Error; - +impl FilePath { /// Creates a new [FilePath] from the given path. If the path does not exist, an [Error] is returned. - fn try_from(path: &String) -> Result { - PathBuf::from(&path).canonicalize().map(|canonicalized_path| Self { - path: path.clone(), + pub fn try_create(path: &str, is_source: bool) -> Result { + PathBuf::from(path).canonicalize().map(|canonicalized_path| Self { + path: path.to_owned(), canonicalized_path, + is_source, }) } } -impl Hash for FilePath { - fn hash(&self, state: &mut H) { - self.canonicalized_path.hash(state); - } -} - impl PartialEq for FilePath { fn eq(&self, other: &Self) -> bool { self.canonicalized_path == other.canonicalized_path } } -pub fn resolve_files_from(options: &SliceOptions, diagnostics: &mut Diagnostics) -> Vec { - // Create a map of all the Slice files with entries like: (absolute_path, is_source). - // HashMap protects against files being passed twice (as reference and source). - // It's important to add sources AFTER references, so sources overwrite references and not vice versa. - let mut file_paths = HashMap::new(); - - let reference_files = find_slice_files(&options.references, true, diagnostics); - - // Report a lint violation for any duplicate reference files. - for reference_file in reference_files { - let path = reference_file.path.clone(); - if file_paths.insert(reference_file, false).is_some() { - Diagnostic::new(Lint::DuplicateFile { path }).push_into(diagnostics); +/// This function takes a `Vec` and returns it, after removing any duplicate elements. +/// A lint violation is reported for each duplicate element. +fn remove_duplicate_file_paths(file_paths: Vec, diagnostics: &mut Diagnostics) -> Vec { + let mut deduped_file_paths = Vec::with_capacity(file_paths.len()); + for file_path in file_paths { + if deduped_file_paths.contains(&file_path) { + let lint = Lint::DuplicateFile { path: file_path.path }; + Diagnostic::new(lint).push_into(diagnostics); + } else { + deduped_file_paths.push(file_path); } } + deduped_file_paths +} - let source_files = find_slice_files(&options.sources, false, diagnostics); - - // Report a lint violation for duplicate source files (any that duplicate another source file not a reference file). - for source_file in source_files { - let path = source_file.path.clone(); - // Insert will replace and return the previous value if the key already exists. - // We use this to allow replacing references with sources. - if let Some(is_source) = file_paths.insert(source_file, true) { - // Only report an error if the file was previously a source file. - if is_source { - Diagnostic::new(Lint::DuplicateFile { path }).push_into(diagnostics); - } +pub fn resolve_files_from(options: &SliceOptions, diagnostics: &mut Diagnostics) -> Vec { + let mut file_paths = Vec::new(); + + // Add any source files to the list of file paths, after removing duplicates. + let source_files = find_slice_files(&options.sources, true, diagnostics); + file_paths.extend(remove_duplicate_file_paths(source_files, diagnostics)); + + // Add any reference files to the list of file paths, after removing duplicates. We omit reference files that have + // already been included as source files; we don't emit a warning for them, we just silently omit them. It's + // important to do this after the source files, to ensure source files are given 'priority' over reference files. + let reference_files = find_slice_files(&options.references, false, diagnostics); + for reference_file in remove_duplicate_file_paths(reference_files, diagnostics) { + if !file_paths.contains(&reference_file) { + file_paths.push(reference_file); } } // Iterate through the discovered files and try to read them into Strings. // Report an error if it fails, otherwise create a new `SliceFile` to hold the data. let mut files = Vec::new(); - for (file_path, is_source) in file_paths { + for file_path in file_paths { match fs::read_to_string(&file_path.path) { - Ok(raw_text) => files.push(SliceFile::new(file_path.path, raw_text, is_source)), + Ok(raw_text) => files.push(SliceFile::new(file_path.path, raw_text, file_path.is_source)), Err(error) => Diagnostic::new(Error::IO { action: "read", path: file_path.path, @@ -87,11 +81,13 @@ pub fn resolve_files_from(options: &SliceOptions, diagnostics: &mut Diagnostics) .push_into(diagnostics), } } - files } -fn find_slice_files(paths: &[String], allow_directories: bool, diagnostics: &mut Diagnostics) -> Vec { +fn find_slice_files(paths: &[String], are_source_files: bool, diagnostics: &mut Diagnostics) -> Vec { + // Directories can only be passed as references. + let allow_directories = !are_source_files; + let mut slice_paths = Vec::new(); for path in paths { let path_buf = PathBuf::from(path); @@ -141,7 +137,7 @@ fn find_slice_files(paths: &[String], allow_directories: bool, diagnostics: &mut slice_paths .into_iter() .map(|path| path.display().to_string()) - .filter_map(|path| match FilePath::try_from(&path) { + .filter_map(|path| match FilePath::try_create(&path, are_source_files) { Ok(file_path) => Some(file_path), Err(error) => { Diagnostic::new(Error::IO { @@ -171,7 +167,7 @@ fn find_slice_files_in_path(path: PathBuf, diagnostics: &mut Diagnostics) -> Vec } } else if path.is_file() && is_slice_file(&path) { // Add the file to the list of paths. - paths.push(path.to_path_buf()); + paths.push(path); } // else we ignore the path diff --git a/src/validators/mod.rs b/src/validators/mod.rs index 12342a2e..42ee6c2e 100644 --- a/src/validators/mod.rs +++ b/src/validators/mod.rs @@ -45,7 +45,7 @@ pub(crate) fn validate_ast(compilation_state: &mut CompilationState) { } let mut validator = ValidatorVisitor::new(diagnostics); - for slice_file in compilation_state.files.values() { + for slice_file in &compilation_state.files { slice_file.visit_with(&mut validator); } } diff --git a/tests/files/a.slice b/tests/files/a.slice new file mode 100644 index 00000000..e69de29b diff --git a/tests/files/b.slice b/tests/files/b.slice new file mode 100644 index 00000000..e69de29b diff --git a/tests/files/c.slice b/tests/files/c.slice new file mode 100644 index 00000000..e69de29b diff --git a/tests/files/io.rs b/tests/files/io.rs index 57bbba1b..92b68d9b 100644 --- a/tests/files/io.rs +++ b/tests/files/io.rs @@ -76,3 +76,73 @@ fn duplicate_reference_files_ignored_with_warning() { }); check_diagnostics(diagnostics.into_inner(), [expected]); } + +#[test] +fn file_resolution_preserves_order() { + // Arrange + let mut diagnostics = Diagnostics::new(); + let file_path_a = PathBuf::from("tests/files/a.slice"); + let file_path_b = PathBuf::from("tests/files/b.slice"); + let file_path_c = PathBuf::from("tests/files/c.slice"); + let file_path_test = PathBuf::from("tests/files/test.slice"); + + let options = SliceOptions { + sources: vec![ + file_path_a.to_str().unwrap().to_owned(), + file_path_b.to_str().unwrap().to_owned(), + file_path_c.to_str().unwrap().to_owned(), + ], + references: vec![ + file_path_b.to_str().unwrap().to_owned(), // Should be filtered out with no diagnostic. + file_path_test.to_str().unwrap().to_owned(), + ], + ..Default::default() + }; + + // Act + let files = resolve_files_from(&options, &mut diagnostics); + + // Assert + assert!(diagnostics.is_empty()); + + assert_eq!(files.len(), 4); + assert_eq!(files[0].relative_path, "tests/files/a.slice"); + assert_eq!(files[1].relative_path, "tests/files/b.slice"); + assert_eq!(files[2].relative_path, "tests/files/c.slice"); + assert_eq!(files[3].relative_path, "tests/files/test.slice"); +} + +#[test] +fn compilation_preserves_order() { + // Arrange + let file_path_a = PathBuf::from("tests/files/a.slice"); + let file_path_b = PathBuf::from("tests/files/b.slice"); + let file_path_c = PathBuf::from("tests/files/c.slice"); + let file_path_test = PathBuf::from("tests/files/test.slice"); + + let options = SliceOptions { + sources: vec![ + file_path_a.to_str().unwrap().to_owned(), + file_path_b.to_str().unwrap().to_owned(), + file_path_c.to_str().unwrap().to_owned(), + ], + references: vec![ + file_path_b.to_str().unwrap().to_owned(), // Should be filtered out with no diagnostic. + file_path_test.to_str().unwrap().to_owned(), + ], + ..Default::default() + }; + + // Act + let state = slicec::compile_from_options(&options, |_| {}, |_| {}); + + // Assert + assert!(state.diagnostics.is_empty()); + + let files = &state.files; + assert_eq!(files.len(), 4); + assert_eq!(files[0].relative_path, "tests/files/a.slice"); + assert_eq!(files[1].relative_path, "tests/files/b.slice"); + assert_eq!(files[2].relative_path, "tests/files/c.slice"); + assert_eq!(files[3].relative_path, "tests/files/test.slice"); +}