Skip to content

Commit

Permalink
Merge pull request #156 from VisLab/develop
Browse files Browse the repository at this point in the history
Refactoring to separate operations from form processing
  • Loading branch information
VisLab committed Feb 12, 2024
2 parents 995fccb + f2cfe29 commit 015e895
Show file tree
Hide file tree
Showing 33 changed files with 2,112 additions and 2,007 deletions.
61 changes: 26 additions & 35 deletions hedweb/process_base.py → hedweb/base_operations.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,26 @@
from abc import ABC, abstractmethod

class ProcessBase(ABC):
def set_input_from_dict(self, input_dict):
""" Sets the child class attributes based on input_dict.
Only sets attributes that exist.
parameters:
input_dict (dict): A dict object containing user data from a JSON service request.
"""
# Only allowed to set variables from init, and also disallow private variables to avoid possible issues
for key, value in input_dict.items():
if not hasattr(self, key) or callable(getattr(self, key)) or key.startswith("_"):
continue
setattr(self, key, value)

@abstractmethod
def set_input_from_form(self, request):
""" Set input for processing from a form.
parameters:
request (Request): A Request object containing user data from the form.
"""
raise NotImplementedError

@abstractmethod
def process(self):
""" Perform the requested string processing action.
Returns:
dict: The results in standard format.
"""
raise NotImplementedError
from abc import ABC, abstractmethod


class BaseOperations(ABC):
def set_input_from_dict(self, input_dict):
""" Sets the child class attributes based on input_dict.
Only sets attributes that exist.
parameters:
input_dict (dict): A dict object containing user data from a JSON service request.
"""
# Only allowed to set variables from init, and also disallow private variables to avoid possible issues
for key, value in input_dict.items():
if not hasattr(self, key) or callable(getattr(self, key)) or key.startswith("_"):
continue
setattr(self, key, value)

@abstractmethod
def process(self):
""" Perform the requested string processing action.
Returns:
dict: The results in standard format.
"""
raise NotImplementedError
83 changes: 45 additions & 38 deletions hedweb/columns.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,68 @@
import openpyxl
import os


from pandas import DataFrame, read_csv
from hed.errors import HedFileError
from hed.tools.analysis.tabular_summary import TabularSummary
from hedweb.constants import base_constants, file_constants
from hedweb.constants import base_constants as bc
from hedweb.constants import file_constants as fc
from hedweb.web_util import form_has_file, form_has_option


def create_column_selections(form_dict):
""" Return a tag prefix dictionary from a form dictionary.
Args:
Parameters:
form_dict (dict): The column prefix table returned from a form.
Returns:
dict: Keys are column numbers (starting with 1) and values are tag prefixes to prepend.
"""

columns_selections = {}
columns = []
columns_selected = []
columns_categorical = []
keys = form_dict.keys()
for key in keys:
if not key.startswith('column') or not key.endswith('use'):
if not key.startswith('column_') or not key.endswith('_name'):
continue
pieces = key.split('_')
name_key = 'column_' + pieces[1] + '_name'
if name_key not in form_dict:
continue
name = form_dict[name_key]
if form_dict.get('column_' + pieces[1] + '_category', None) == 'on':
columns_selections[name] = True
else:
columns_selections[name] = False
col_name = form_dict[key]
columns.append(col_name)
if 'column_' + pieces[1] + '_use' in keys:
columns_selected.append(col_name)
if 'column_' + pieces[1] + '_category' in keys:
columns_categorical.append(col_name)
columns_value = list(set(columns_selected).difference(set(columns_categorical)))
columns_skip = list(set(columns).difference(set(columns_selected)))
return columns_value, columns_skip

return columns_selections

def get_tag_columns(form_dict):
""" Return the tag column names selected from a form.
def create_columns_included(form_dict):
""" Return a list of columns to be included.
Args:
form_dict (dict): The dictionary returned from a form that contains the columns to be included.
Parameters:
form_dict (dict): The column names table
Returns:
(list): Column names to be included.
list: List of tag columns
"""
# TODO: Implement this.
return []
tag_columns = []
keys = form_dict.keys()
for key in keys:
if not key.startswith('column_') or not key.endswith('_use'):
continue
pieces = key.split('_')
column_name_key = 'column_' + pieces[1] + '_name'
if column_name_key in keys and form_dict[column_name_key]:
tag_columns.append(form_dict[column_name_key])
return tag_columns


def _create_columns_info(columns_file, has_column_names=True, sheet_name=None):
""" Create a dictionary of column information from a spreadsheet.
Args:
Parameters:
columns_file (File-like): File to create the dictionary for.
has_column_names (bool): If True, first row is interpreted as the column names.
sheet_name (str): The name of the worksheet if this is an Excel file.
Expand All @@ -74,11 +81,11 @@ def _create_columns_info(columns_file, has_column_names=True, sheet_name=None):
sheet_names = None
filename = columns_file.filename
file_ext = os.path.splitext(filename.lower())[1]
if file_ext in file_constants.EXCEL_FILE_EXTENSIONS:
if file_ext in fc.EXCEL_FILE_EXTENSIONS:
worksheet, sheet_names = _get_worksheet(columns_file, sheet_name)
dataframe = dataframe_from_worksheet(worksheet, has_column_names)
sheet_name = worksheet.title
elif file_ext in file_constants.TEXT_FILE_EXTENSIONS:
elif file_ext in fc.TEXT_FILE_EXTENSIONS:
dataframe = read_csv(columns_file, delimiter='\t', header=header)
else:
raise HedFileError('BadFileExtension',
Expand All @@ -87,16 +94,16 @@ def _create_columns_info(columns_file, has_column_names=True, sheet_name=None):
col_dict = TabularSummary()
col_dict.update(dataframe)
col_counts = col_dict.get_number_unique()
columns_info = {base_constants.COLUMNS_FILE: filename, base_constants.COLUMN_LIST: col_list,
base_constants.COLUMN_COUNTS: col_counts,
base_constants.WORKSHEET_SELECTED: sheet_name, base_constants.WORKSHEET_NAMES: sheet_names}
columns_info = {bc.COLUMNS_FILE: filename, bc.COLUMN_LIST: col_list,
bc.COLUMN_COUNTS: col_counts,
bc.WORKSHEET_SELECTED: sheet_name, bc.WORKSHEET_NAMES: sheet_names}
return columns_info


def dataframe_from_worksheet(worksheet, has_column_names):
""" Return a pandas data frame from an Excel worksheet.
Args:
Parameters:
worksheet (Worksheet): A single worksheet of an Excel file.
has_column_names (bool): If True, interpret the first row as column names.
Expand All @@ -118,7 +125,7 @@ def dataframe_from_worksheet(worksheet, has_column_names):
def get_columns_request(request):
""" Create a columns info dictionary based on the request.
Args:
Parameters:
request (Request): The Request object from which to extract the information.
Returns:
Expand All @@ -129,15 +136,15 @@ def get_columns_request(request):
"""
if not form_has_file(request, base_constants.COLUMNS_FILE):
if not form_has_file(request.files, bc.COLUMNS_FILE):
raise HedFileError('MissingFile', 'An uploadable file was not provided', None)
columns_file = request.files.get(base_constants.COLUMNS_FILE, '')
has_column_names = form_has_option(request, 'has_column_names', 'on')
sheet_name = request.form.get(base_constants.WORKSHEET_SELECTED, None)
columns_file = request.files.get(bc.COLUMNS_FILE, '')
has_column_names = form_has_option(request.form, 'has_column_names', 'on')
sheet_name = request.form.get(bc.WORKSHEET_SELECTED, None)
return _create_columns_info(columns_file, has_column_names, sheet_name)


def get_column_names(form_dict):
def get_column_numbers(form_dict):
""" Return a tag prefix dictionary from a form dictionary.
Parameters:
Expand All @@ -163,7 +170,7 @@ def get_column_names(form_dict):
def _get_worksheet(excel_file, sheet_name):
""" Return a Worksheet and a list of sheet names from an Excel file.
Args:
Parameters:
excel_file (str): Name of the Excel file to use.
sheet_name (str or None): Name of the worksheet if any, otherwise the first one.
Expand Down
6 changes: 3 additions & 3 deletions hedweb/constants/base_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
COLUMNS_FILE = 'columns_file'
COLUMNS_INFO = 'columns_info'
COLUMNS_PATH = 'columns_path'
COLUMNS_INCLUDED = 'columns_included'
COLUMNS_SELECTED = 'columns_selected'
COLUMNS_SKIP = 'skip_columns'
COLUMNS_VALUE = 'columns_value'

COMMAND = 'command'
Expand Down Expand Up @@ -70,7 +69,8 @@
OTHER_VERSION_OPTION = 'Other'
OUTPUT_DISPLAY_NAME = 'output_display_name'

QUERY = 'query'
QUERIES = 'queries'
QUERY_NAMES = 'query_names'
REMODEL_FILE = 'remodel_file'
REMODEL_OPERATIONS = 'remodel_operations'
REMODEL_STRING = 'remodel_string'
Expand Down

0 comments on commit 015e895

Please sign in to comment.