From 252f5b0baad12ae6349e40e5163a6cacee2163fc Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Mon, 21 Oct 2019 11:19:29 -0600 Subject: [PATCH 01/10] Issue #23 This separates numeric and non-numeric values into their own columns. --- landbosse/excelio/XlsxGenerator.py | 51 ++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/landbosse/excelio/XlsxGenerator.py b/landbosse/excelio/XlsxGenerator.py index 420cb725..1c6da8a6 100644 --- a/landbosse/excelio/XlsxGenerator.py +++ b/landbosse/excelio/XlsxGenerator.py @@ -150,7 +150,7 @@ def tab_details(self, rows): This writes a detailed outputs tab. It takes a list of dictionaries as the parameters and in each of those dictionaries it looks at the keys: - ['project_id', 'module', 'type', 'variable_df_key_col_name', 'unit', 'value'] + ['project_id', 'module', 'type', 'variable_df_key_col_name', 'unit', 'numeric value', 'non_numeric_value'] The values of each of those keys become each cell in the row. @@ -164,23 +164,31 @@ def tab_details(self, rows): worksheet.set_column(4, 4, 17) worksheet.set_column(5, 5, 66) worksheet.set_column(0, 2, 17) - for idx, col_name in enumerate(['project_id', 'module', 'type', 'variable_df_key_col_name', 'unit', 'value', 'last number']): + + for idx, col_name in enumerate(['Project ID', 'Module', 'Variable of DataFrame', 'name', 'unit', 'Numeric value', 'Non-numeric value']): worksheet.write(0, idx, col_name, self.header_format) + + # Go through each row and create Excel rows from each of those rows. for row_idx, row in enumerate(rows): worksheet.write(row_idx + 1, 0, row['project']) worksheet.write(row_idx + 1, 1, row['module']) worksheet.write(row_idx + 1, 2, row['type']) worksheet.write(row_idx + 1, 3, row['variable_df_key_col_name']) worksheet.write(row_idx + 1, 4, row['unit']) - if type(row['value']) is str or type(row['value']) is int or type(row['value']) is float: - worksheet.write(row_idx + 1, 5, row['value'], self.scientific_format) + + value = row['value'] + value_is_number = self._is_numeric(value) + if value_is_number: + worksheet.write(row_idx + 1, 5, value, self.scientific_format) else: - worksheet.write(row_idx + 1, 5, str(row['value'])) + worksheet.write(row_idx + 1, 6, value) + + # If there is a last_number, which means this is a dataframe row that has a number + # at the end, write this into the numeric value column. + if 'last_number' in row: - if type(row['last_number']) is int or type(row['last_number']) is float: - worksheet.write(row_idx + 1, 6, row['last_number'], self.scientific_format) - else: - worksheet.write(row_idx + 1, 6, str(row['last_number'])) + worksheet.write(row_idx + 1, 5, row['last_number'], self.scientific_format) + worksheet.freeze_panes(1, 0) # Freeze the first row. def tab_details_with_validation(self, rows, validation_xlsx): @@ -278,3 +286,28 @@ def tab_details_with_validation(self, rows, validation_xlsx): worksheet.write(row_idx, 9, '=CONCATENATE({}, {})'.format(pk_part_1_cell, pk_part_2_cell)) row_idx += 1 worksheet.freeze_panes(1, 0) # Freeze the first row. + + def _is_numeric(self, value): + """ + This method determines if the given value is numeric (an int or a + float). If it is numeric, the value can be placed into a numeric + column. If it is non-numeric, the value can be placed into a non- + numeric column. + + This is accomplished by attempting to parse the value as a float. + + Parameters + ---------- + value + The value to be tested. + + Returns + ------- + bool + True if the value can be parsed as a float, False otherwise. + """ + try: + float(value) + except ValueError: + return False + return True From 53d85cdbeb3f60f3a53630c1dc533eea28f8ab62 Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Mon, 21 Oct 2019 14:12:50 -0600 Subject: [PATCH 02/10] Issue #23 Change "Variable of DataFrame" to "Variable or DataFrame" --- landbosse/excelio/XlsxGenerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/landbosse/excelio/XlsxGenerator.py b/landbosse/excelio/XlsxGenerator.py index 1c6da8a6..dfe12cca 100644 --- a/landbosse/excelio/XlsxGenerator.py +++ b/landbosse/excelio/XlsxGenerator.py @@ -165,7 +165,7 @@ def tab_details(self, rows): worksheet.set_column(5, 5, 66) worksheet.set_column(0, 2, 17) - for idx, col_name in enumerate(['Project ID', 'Module', 'Variable of DataFrame', 'name', 'unit', 'Numeric value', 'Non-numeric value']): + for idx, col_name in enumerate(['Project ID', 'Module', 'Variable or DataFrame', 'name', 'unit', 'Numeric value', 'Non-numeric value']): worksheet.write(0, idx, col_name, self.header_format) # Go through each row and create Excel rows from each of those rows. From 2c2b67d9dcc85c132f23c6a7311da82ac5d596e3 Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Tue, 22 Oct 2019 14:44:53 -0600 Subject: [PATCH 03/10] Issue #23 Restore the method that detects numeric and non-numeric values. --- landbosse/excelio/XlsxGenerator.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/landbosse/excelio/XlsxGenerator.py b/landbosse/excelio/XlsxGenerator.py index d40ee0ac..a6bc2188 100644 --- a/landbosse/excelio/XlsxGenerator.py +++ b/landbosse/excelio/XlsxGenerator.py @@ -190,3 +190,21 @@ def tab_details(self, rows): worksheet.write(row_idx + 1, 5, row['last_number'], self.scientific_format) worksheet.freeze_panes(1, 0) # Freeze the first row. + + def _is_numeric(self, value): + """ + Parameters + ---------- + value + The value to be tested. + + Returns + ------- + bool + True if the value is numeric, False otherwise. + """ + try: + float(value) + except ValueError: + return False + return True From c7826a975c046cd582318c35d91b6a8d7bd8e82b Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Tue, 22 Oct 2019 17:17:20 -0600 Subject: [PATCH 04/10] Issue #23 Added a better comment. --- landbosse/excelio/XlsxGenerator.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/landbosse/excelio/XlsxGenerator.py b/landbosse/excelio/XlsxGenerator.py index a6bc2188..05c46226 100644 --- a/landbosse/excelio/XlsxGenerator.py +++ b/landbosse/excelio/XlsxGenerator.py @@ -193,6 +193,12 @@ def tab_details(self, rows): def _is_numeric(self, value): """ + This method tests if a value is a numeric (that is, can be parsed + by float()) or non numeric (which cannot be parsed). + + The decision from this method determines whether values go into + the numeric or non-numeric columns. + Parameters ---------- value From 20162da3ec9b21478b0f56d1e91eb1d8413a79a9 Mon Sep 17 00:00:00 2001 From: eberlea Date: Wed, 23 Oct 2019 16:33:42 -0700 Subject: [PATCH 05/10] Potential change to validation functionality Puts validation output file into output folder and saves results from validation run --- main.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index 87748a2e..52377861 100644 --- a/main.py +++ b/main.py @@ -42,7 +42,7 @@ # Generated based on input_path from command line when --validate option is specified # (validation output file must be in inputs folder and must be called 'landbosse-output-validation.xlsx') expected_validation_data_path = os.path.join(input_path, 'landbosse-expected-validation-data.xlsx') - validation_result_path = os.path.join(input_path, 'landbosse-validation-result.xlsx') + validation_result_path = os.path.join(file_ops.landbosse_output_dir(), 'landbosse-validation-result.xlsx') validator = XlsxValidator() validation_was_successful = validator.compare_expected_to_actual( @@ -54,11 +54,11 @@ print('Validation passed.') else: print('Validation failed. See mismatched data above.') - else: - # XlsxGenerator has a context manager that writes each individual - # worksheet to the output .xlsx. Also, copy file input structure. - print('Writing final output folder') - with XlsxGenerator('landbosse-output', file_ops) as xlsx: - xlsx.tab_costs_by_module_type_operation(rows=final_result['module_type_operation_list']) - xlsx.tab_details(rows=final_result['details_list']) - file_ops.copy_input_data() + + # XlsxGenerator has a context manager that writes each individual + # worksheet to the output .xlsx. Also, copy file input structure. + print('Writing final output folder') + with XlsxGenerator('landbosse-output', file_ops) as xlsx: + xlsx.tab_costs_by_module_type_operation(rows=final_result['module_type_operation_list']) + xlsx.tab_details(rows=final_result['details_list']) + file_ops.copy_input_data() From 2d400c14909e4463d83e55c9515ec27ecaacef9c Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Thu, 24 Oct 2019 10:59:44 -0600 Subject: [PATCH 06/10] Issue #23 Update the CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c15195b..6b1456f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,3 +8,10 @@ - Enhancements to all modules in model. - Black box tests. - Dictionary based interface to integrate with other modeling codes. + +## 2.1.1 (October 9, 2019) + +- In the `costs_by_module_type_operation` tab, standardize all costs to USD/kW per project, cost per project, cost per turbine. +- Improve docstrings in source code. +- Refactor more functionality into a new `CostModule` class. +- Clean up logging to use simple `print()` statements which are safe to use in multi-process parallel logging operations. From 4547862d7cd68abf0ce74e66152b132b1df7bf8c Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Thu, 24 Oct 2019 11:05:01 -0600 Subject: [PATCH 07/10] Issue #23 Update changelog. --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b1456f4..6ca8367c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,3 +15,8 @@ - Improve docstrings in source code. - Refactor more functionality into a new `CostModule` class. - Clean up logging to use simple `print()` statements which are safe to use in multi-process parallel logging operations. + +## 2.1.2 (October 24, 2019) + +- Add separated "numeric value" and "non-numeric value" to columns on the details sheet. +- Add support to test current model output against previously known good model output to guard against regressions when the model is changed. From 4c5b4315a7868544727544b7336ccf3928f440c5 Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Thu, 24 Oct 2019 11:10:43 -0600 Subject: [PATCH 08/10] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ca8367c..243f07b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,3 +20,5 @@ - Add separated "numeric value" and "non-numeric value" to columns on the details sheet. - Add support to test current model output against previously known good model output to guard against regressions when the model is changed. +- Add support for command line options to control validation, input folder and output folder so that environment variables are not needed. +- Added documentation about command line operation with flowcharts about how LandBOSSE processes data according to the command line. From e61757798eec12489b87e83c3074144a0547eb11 Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Thu, 24 Oct 2019 11:24:42 -0600 Subject: [PATCH 09/10] Issue #23 If there is an expected validation output in the input folder, copy that to the output folder. --- landbosse/excelio/XlsxFileOperations.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/landbosse/excelio/XlsxFileOperations.py b/landbosse/excelio/XlsxFileOperations.py index 92aeaad2..dbfe4a27 100644 --- a/landbosse/excelio/XlsxFileOperations.py +++ b/landbosse/excelio/XlsxFileOperations.py @@ -153,6 +153,15 @@ def copy_input_data(self): copy2(src_project_list_xlsx, dst_project_list_xlsx) copytree(src_project_data_dir, dst_project_data_dir) + src_expected_validation_data = os.path.join(self.landbosse_input_dir(), + 'landbosse-expected-validation-data.xlsx') + + dst_expected_validation_data = os.path.join(self.landbosse_output_dir(), + 'landbosse-expected-validation-data.xlsx') + + if os.path.isfile(src_expected_validation_data): + copy2(src_expected_validation_data, dst_expected_validation_data) + def timestamp_filename(self, directory, basename, extension): """ This function creates a timestamped filename. It uses a filename in the From a6cf98e122d7a3f30bcd7b238576478c9c2d0b88 Mon Sep 17 00:00:00 2001 From: Alicia Key Date: Thu, 24 Oct 2019 13:41:05 -0600 Subject: [PATCH 10/10] Issue #23 Update docuemntation for how to execute the validation. --- installation_instructions/operation_and_folder_structure.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/installation_instructions/operation_and_folder_structure.md b/installation_instructions/operation_and_folder_structure.md index 9582cbdd..f468a350 100644 --- a/installation_instructions/operation_and_folder_structure.md +++ b/installation_instructions/operation_and_folder_structure.md @@ -117,7 +117,7 @@ For validating LandBOSSE, the input data are stored in the same folder structure To validate the model, you would run the following command. ``` -python main.py --input PATH_TO_VALIDATION_FOLDER --validate +python main.py --input PATH_TO_FOLDER_WITH_VALIDATION_DATA --output PATH_TO_YOUR_OUTPUT_FOLDER --validate ``` -This command will run the model to obtain the actual data and will create the `landbosse-validation-result.xlsx`. +This command will run the model to obtain the actual data and will create the `landbosse-validation-result.xlsx`. It will write the validation