diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bbaa54f3..6b32722c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,6 +5,6 @@ repos: - id: isort - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 24.10.0 hooks: - id: black \ No newline at end of file diff --git a/components/analysis_params.py b/components/analysis_params.py index bde496d1..e824bff4 100644 --- a/components/analysis_params.py +++ b/components/analysis_params.py @@ -1,5 +1,6 @@ from tempfile import TemporaryDirectory +import polars as pl from pydantic import BaseModel from analyzer_interface import ( @@ -11,7 +12,7 @@ ) from app import ProjectContext from context import InputColumnProvider, PrimaryAnalyzerDefaultParametersContext -from terminal_tools import print_ascii_table, prompts +from terminal_tools import prompts, smart_print_data_frame from .context import ViewContext @@ -58,7 +59,7 @@ def customize_analysis( } while True: - with context.terminal.nest("Customization"): + with context.terminal.nest("◆◆ Parameter customization ◆◆"): param_states = [ ParamState( param_spec=param_spec, @@ -67,15 +68,21 @@ def customize_analysis( for param_spec in analyzer.params ] - print_ascii_table( - [ - [ - param_state.param_spec.print_name, - print_param_value(param_state.value), - ] - for param_state in param_states - ], - header=["parameter name", "parameter value"], + smart_print_data_frame( + data_frame=pl.DataFrame( + { + "parameter name": [ + param_state.param_spec.print_name + for param_state in param_states + ], + "parameter value": [ + print_param_value(param_state.value) + for param_state in param_states + ], + } + ), + title="Analysis Parameters", + apply_color=None, ) has_all_params = all( diff --git a/components/new_analysis.py b/components/new_analysis.py index cc7f4bf8..b5bf600c 100644 --- a/components/new_analysis.py +++ b/components/new_analysis.py @@ -1,4 +1,3 @@ -from tempfile import TemporaryDirectory from traceback import format_exc from typing import Optional @@ -7,14 +6,12 @@ from analyzer_interface import ( AnalyzerInterface, InputColumn, - ParamValue, UserInputColumn, column_automap, get_data_type_compatibility_score, ) from app import ProjectContext -from context import InputColumnProvider, PrimaryAnalyzerDefaultParametersContext -from terminal_tools import draw_box, print_ascii_table, prompts, wait_for_key +from terminal_tools import draw_box, prompts, smart_print_data_frame, wait_for_key from .analysis_params import customize_analysis from .context import ViewContext @@ -44,20 +41,33 @@ def new_analysis( with terminal.nest(draw_box(analyzer.name, padding_lines=0)): with terminal.nest("◆◆ About this test ◆◆"): - print("") print(analyzer.long_description or analyzer.short_description) print("") print("◆◆ Required Input ◆◆") print("The test requires these columns in the input data:") print("") - for index, input_column in enumerate(analyzer.input.columns): - print( - f"[{index + 1}] {input_column.human_readable_name_or_fallback()}" - f" ({input_column.data_type})" + + required_cols_dict = {"Column ID": [], "Description": []} + for input_column in analyzer.input.columns: + required_cols_dict["Column ID"].append( + input_column.human_readable_name_or_fallback() ) - print(input_column.description or "") - print("") + required_cols_dict["Description"].append(input_column.description) + + smart_print_data_frame( + data_frame=pl.DataFrame(required_cols_dict), + title=None, + apply_color="row-wise", + ) + + # for index, input_column in enumerate(analyzer.input.columns): + # print( + # f"[{index + 1}] {input_column.human_readable_name_or_fallback()}" + # f" ({input_column.data_type})" + # ) + # print(input_column.description or "") + # print("") user_columns = project.columns user_columns_by_name = { @@ -75,8 +85,8 @@ def new_analysis( print("These columns cannot be satisfied:") for input_column in unmapped_columns: print( - f"- {input_column.human_readable_name_or_fallback() - } ({input_column.data_type})" + f"- {input_column.human_readable_name_or_fallback()}" + + f" ({input_column.data_type})" ) print("") @@ -92,16 +102,25 @@ def new_analysis( final_column_mapping = draft_column_mapping while True: - with terminal.nest("Column mapping") as column_mapping_scope: - print_ascii_table( - rows=[ - [ - input_column.human_readable_name_or_fallback(), - '"' + draft_column_mapping.get(input_column.name) + '"', - ] - for input_column in analyzer.input.columns - ], - header=["Test's Input Column", "← Your Dataset's Column"], + with terminal.nest("◆◆ Column selection ◆◆") as column_mapping_scope: + mapping_df = pl.DataFrame( + { + "Column Name for Analyzer Input": [ + input_column.human_readable_name_or_fallback() + for input_column in analyzer.input.columns + ], + "← Column Name In Your Dataset": [ + draft_column_mapping.get(input_column.name) + for input_column in analyzer.input.columns + ], + } + ) + + smart_print_data_frame( + data_frame=mapping_df, + title=None, + apply_color="row-wise", + smart_print=False, ) sample_input_df = pl.DataFrame( @@ -115,7 +134,12 @@ def new_analysis( } ) print("Your test data would look like this:") - print(sample_input_df) + smart_print_data_frame( + data_frame=sample_input_df, + title="Sample input data", + apply_color="column-wise", + smart_print=False, + ) mapping_ok = prompts.confirm( "Are you happy with this mapping?", @@ -163,8 +187,8 @@ def new_analysis( print("Explanation: " + selected_analyzer_column.description) print("") print( - f"The test requires data type [{ - selected_analyzer_column.data_type}] for this column." + "The test requires data type" + + f"[{selected_analyzer_column.data_type}] for this column." ) print("") @@ -191,7 +215,6 @@ def new_analysis( draft_column_mapping[selected_analyzer_column.name] = ( selected_user_column.name ) - param_values = customize_analysis( context, project, analyzer, final_column_mapping ) diff --git a/components/select_project.py b/components/select_project.py index 936f2b4a..95111639 100644 --- a/components/select_project.py +++ b/components/select_project.py @@ -1,7 +1,7 @@ from typing import Optional from app import ProjectContext -from terminal_tools import draw_box, print_ascii_table, prompts, wait_for_key +from terminal_tools import draw_box, prompts, smart_print_data_frame, wait_for_key from .context import ViewContext @@ -30,20 +30,11 @@ def select_project(ctx: ViewContext): draw_box(f"Project: {project.display_name}", padding_lines=0) ): df = project.preview_data - print_ascii_table( - [ - [preview_value(cell) for cell in row] - for row in df.head(10).iter_rows() - ], - header=df.columns, - ) - print(f"(Total {project.data_row_count} rows)") - print("Inferred column semantics:") - print_ascii_table( - rows=[ - [col.name, col.semantic.semantic_name] for col in project.columns - ], - header=["Column", "Semantic"], + smart_print_data_frame( + data_frame=df.head(5), + title="Input data preview", + apply_color=None, + caption=f"Total rows: {project.data_row_count:,}", ) confirm_load = prompts.confirm("Load this project?", default=True) diff --git a/components/splash.py b/components/splash.py index 415c78ae..bd5b073e 100644 --- a/components/splash.py +++ b/components/splash.py @@ -1,18 +1,76 @@ +from rich import print +from rich.console import Console +from rich.panel import Panel + from meta import get_version from terminal_tools import clear_terminal, wait_for_key def splash(): + console = Console() + + # Calculate exact width thresholds for each logo size + # Measured from actual ASCII art content + Rich Panel borders/padding + BIG_LOGO_WIDTH = 102 # Big logo longest line + SMALL_LOGO_WIDTH = 78 # Small logo longest line + PANEL_PADDING = 4 # Rich Panel border + padding (2 chars each side) + + BIG_THRESHOLD = BIG_LOGO_WIDTH + PANEL_PADDING # 106 chars + SMALL_THRESHOLD = SMALL_LOGO_WIDTH + PANEL_PADDING # 82 chars + clear_terminal() - print(_ascii_splash) - print("") - print(f"{get_version() or ''}") + + # Three-tier adaptive logo display + if console.size.width < SMALL_THRESHOLD: + print(_LOGO_MINI) # Very narrow terminals + elif console.size.width < BIG_THRESHOLD: + print(_ASCII_LOGO_SMALL) # Medium terminals + else: + print(_ASCII_LOGO_BIG) # Wide terminals + + print(_ASCII_TREE) print("") wait_for_key(True) -_ascii_splash: str = """ - -..*+:..- +_VERSION = f"[dim]{get_version() or 'development version'}[/dim]" +_TITLE = "A Civic Tech DC Project" + +_LOGO_MINI = Panel.fit( + """[orange1 bold] + CIB MANGO TREE + [/orange1 bold]""", + title=_TITLE, + title_align="center", + subtitle=_VERSION, +) + +_ASCII_LOGO_SMALL = Panel.fit( + """[orange1] + ____ ___ ____ __ __ _____ + / ___|_ _| __ ) | \/ | __ _ _ __ __ _ ___ |_ _| __ ___ ___ + | | | || _ \ | |\/| |/ _` | '_ \ / _` |/ _ \ | || '__/ _ \/ _ \\ + | |___ | || |_) | | | | | (_| | | | | (_| | (_) | | || | | __/ __/ + \____|___|____/ |_| |_|\__,_|_| |_|\__, |\___/ |_||_| \___|\___| + |___/[/orange1]""", + title=_TITLE, + subtitle=_VERSION, +) + +_ASCII_LOGO_BIG = Panel.fit( + """[orange1] + ██████╗ ██╗ ██████╗ ███╗ ███╗ █████╗ ███╗ ██╗ ██████╗ ██████╗ ████████╗ ██████╗ ███████╗ ███████╗ + ██╔════╝ ██║ ██╔══██╗ ████╗ ████║ ██╔══██╗ ████╗ ██║ ██╔════╝ ██╔═══██╗ ╚══██╔══╝ ██╔══██╗ ██╔════╝ ██╔════╝ + ██║ ██║ ██████╔╝ ██╔████╔██║ ███████║ ██╔██╗ ██║ ██║ ███╗ ██║ ██║ ██║ ██████╔╝ █████╗ █████╗ + ██║ ██║ ██╔══██╗ ██║╚██╔╝██║ ██╔══██║ ██║╚██╗██║ ██║ ██║ ██║ ██║ ██║ ██╔══██╗ ██╔══╝ ██╔══╝ + ╚██████╗ ██║ ██████╔╝ ██║ ╚═╝ ██║ ██║ ██║ ██║ ╚████║ ╚██████╔╝ ╚██████╔╝ ██║ ██║ ██║ ███████╗ ███████╗ + ╚═════╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═══╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚══════╝[/orange1]""", + title=_TITLE, + subtitle=_VERSION, +) + +_ASCII_TREE: str = """ + -..*+:..-. -.=-+%@%##+-=.- = =:*%:...=:..=@*:+ = :: -:=#==#*=:::-=-...-::: @@ -26,14 +84,20 @@ def splash(): @@#= @@% @@@ +""" - C I B M A N G O T R E E - - A Civic Tech DC Project - +_FOOTER: str = Panel.fit( + """ + A Civic Tech DC Project +[red] ╱ * * * ╱ ╲ - ╲ ===== ╱ ╱ + ╲ ===== ╱ ╱[/red] """ +) + """ -I generated this using https://www.asciiart.eu/image-to-ascii +Notes: +Logo generated with: https://github.com/shinshin86/oh-my-logo +(used as: `npx oh-my-logo "CIB Mango Tree" gold --filled --no-color`) +Ascii tree was generated with: https://www.asciiart.eu/image-to-ascii """ diff --git a/requirements.txt b/requirements.txt index d67c23d9..1e840ec9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ shinywidgets==0.6.2 starlette==0.47.1 uvicorn==0.34.3 a2wsgi==1.10.10 -python-json-logger==2.0.7 \ No newline at end of file +python-json-logger==2.0.7 +rich==14.0.0 \ No newline at end of file diff --git a/terminal_tools/__init__.py b/terminal_tools/__init__.py index fe49cd3c..14fea23c 100644 --- a/terminal_tools/__init__.py +++ b/terminal_tools/__init__.py @@ -6,5 +6,7 @@ enable_windows_ansi_support, open_directory_explorer, print_ascii_table, + print_dialog_section_title, + smart_print_data_frame, wait_for_key, ) diff --git a/terminal_tools/inception.py b/terminal_tools/inception.py index d81c5d44..f672feca 100644 --- a/terminal_tools/inception.py +++ b/terminal_tools/inception.py @@ -5,6 +5,8 @@ in memory and printed to the terminal at each refresh. """ +# from terminal_tools import print_dialog_section_title + from .utils import clear_terminal diff --git a/terminal_tools/utils.py b/terminal_tools/utils.py index f1bd63bd..023f573b 100644 --- a/terminal_tools/utils.py +++ b/terminal_tools/utils.py @@ -2,6 +2,11 @@ import subprocess import sys +import polars as pl +from rich.console import Console +from rich.style import Style +from rich.table import Table + def clear_terminal(): """Clears the terminal""" @@ -188,3 +193,256 @@ def border_row(left: str, middle: str, right: str, char: str = "─"): # bottom border print(border_row("└─", "─┴─", "─┘")) + + +console = Console() + + +def print_data_frame(data_frame, title: str, apply_color: str, caption: str = None): + # Mapping Polars data types to Rich colors (medium brightness) + # see: https://rich.readthedocs.io/en/stable/appendix/colors.html + POLARS_TYPE_COLORS = { + # String types + pl.String: "dodger_blue2", + pl.Categorical: "light_blue3", + # Numeric types + pl.Int8: "green3", + pl.Int16: "green3", + pl.Int32: "green3", + pl.Int64: "green3", + pl.UInt8: "dark_green", + pl.UInt16: "dark_green", + pl.UInt32: "dark_green", + pl.UInt64: "dark_green", + pl.Float32: "orange3", + pl.Float64: "orange3", + # Temporal types + pl.Date: "medium_purple2", + pl.Datetime: "medium_purple3", + pl.Time: "purple3", + pl.Duration: "orchid3", + # Boolean + pl.Boolean: "gold3", + # Complex types + pl.List: "dark_cyan", + pl.Struct: "cyan3", + pl.Array: "steel_blue3", + # Binary/Other + pl.Binary: "grey62", + pl.Null: "grey50", + pl.Object: "deep_pink3", + pl.Unknown: "indian_red3", + } + + # Color cycle for column/row-wise coloring + CYCLE_COLORS = [ + "orange3", + "dodger_blue1", + "dark_cyan", + "medium_purple1", + "deep_pink4", + "gold1", + "grey66", + "steel_blue1", + ] + + # Get colors based on column data types + def get_column_color(dtype): + # Handle parameterized types like Datetime(time_unit, time_zone) + if hasattr(dtype, "base_type"): + base_type = dtype.base_type() + if base_type in POLARS_TYPE_COLORS: + return POLARS_TYPE_COLORS[base_type] + + # Direct type mapping + dtype_class = type(dtype) + if dtype_class in POLARS_TYPE_COLORS: + return POLARS_TYPE_COLORS[dtype_class] + + # Check if it's a subclass of known types + for polars_type, color in POLARS_TYPE_COLORS.items(): + if isinstance(dtype, polars_type): + return color + + # Fallback to index-based coloring + return CYCLE_COLORS[hash(str(dtype)) % len(CYCLE_COLORS)] + + # Capture original data types BEFORE string conversion + original_dtypes = { + col: data_frame.select(col).dtypes[0] for col in data_frame.columns + } + + # Convert non-string columns to strings for display + data_frame = data_frame.with_columns(pl.exclude(pl.String).cast(str)) + + table = Table(title=title, caption=caption) + + # Add columns with appropriate coloring and width limits + for i, col in enumerate(data_frame.columns): + if apply_color == "column-wise": + # Cycle through colors for each column + col_color = CYCLE_COLORS[i % len(CYCLE_COLORS)] + table.add_column( + col, + style=col_color, + overflow="ellipsis", + no_wrap=True, + ) + elif apply_color == "column_data_type": + # Color based on ORIGINAL data type (before string conversion) + original_dtype = original_dtypes[col] + col_color = get_column_color(original_dtype) + table.add_column( + col, + style=col_color, + overflow="ellipsis", + no_wrap=True, + ) + elif apply_color is None: + # No coloring at all - omit style parameter entirely + table.add_column( + col, + overflow="ellipsis", + no_wrap=True, + ) + else: + # No column coloring - omit style parameter entirely + table.add_column( + col, + overflow="ellipsis", + no_wrap=True, + ) + + # Add rows with appropriate coloring based on mode + if apply_color == "row-wise": + # Cycle through colors for each row + for i, row in enumerate(data_frame.iter_rows()): + row_color = CYCLE_COLORS[i % len(CYCLE_COLORS)] + table.add_row(*row, style=row_color) + elif apply_color is None: + # No row coloring at all + for row in data_frame.iter_rows(): + table.add_row(*row) + else: + # No row coloring (column coloring only) + for row in data_frame.iter_rows(): + table.add_row(*row) + + console.print(table) + + +def print_data_frame_summary( + data_frame, title: str, apply_color: str = "column_data_type", caption: str = None +): + """Print a summary table for dataframes with many columns""" + from preprocessing.series_semantic import infer_series_semantic + + MAX_ROW_CHAR = 25 + + # Create summary data + summary_data = [] + for col in data_frame.columns: + dtype = data_frame.select(col).dtypes[0] + + # Get example value (first non-null if possible) + example_series = data_frame.select(col).to_series() + example_val = None + for val in example_series: + if val is not None: + example_val = str(val) + break + if example_val is None: + example_val = "null" + + # Truncate long examples + if len(example_val) > MAX_ROW_CHAR: + example_val = example_val[:MAX_ROW_CHAR] + "..." + + # Get semantic analysis type + try: + semantic = infer_series_semantic(data_frame.select(col).to_series()) + analysis_type = semantic.data_type if semantic else "unknown" + except Exception: + analysis_type = "unknown" + + summary_data.append([col, str(dtype), example_val, analysis_type]) + + # Create summary dataframe + summary_df = pl.DataFrame( + { + "Column Name": [row[0] for row in summary_data], + "Data Type": [row[1] for row in summary_data], + "Example Value": [row[2] for row in summary_data], + "Inferred Analyzer Input Type": [row[3] for row in summary_data], + } + ) + + # Print with specified coloring mode + print_data_frame(summary_df, title, apply_color, caption) + + +def smart_print_data_frame( + data_frame: pl.DataFrame, + title: str, + apply_color: str | None = "column_data_type", + smart_print: bool = True, + caption: str | None = None, +) -> None: + """Smart dataframe printing with adaptive display based on terminal width. + + Automatically chooses between full table display and summary view based on terminal + width and number of columns. Provides Rich-styled tables with configurable coloring. + + Args: + data_frame: Polars DataFrame to display + title: Title text to display above the table + apply_color: Color mode for the table display: + - "column_data_type": Colors columns based on their Polars data types + - "column-wise": Cycles through colors for each column + - "row-wise": Cycles through colors for each row + - None: No coloring (plain black and white display) + smart_print: Controls adaptive display behavior: + - True: Uses summary view for wide tables (>8 cols or narrow terminal) + - False: Always uses full table display regardless of width + caption: Optional caption text displayed below the table + + Display Logic: + - If smart_print=False: Always shows full table + - If smart_print=True and (>8 columns OR estimated column width <12): + Shows summary with column info, data types, and examples + - Otherwise: Shows full table with all data + + Examples: + >>> smart_print_data_frame(df, "My Data", apply_color=None) + >>> smart_print_data_frame(df, "Analysis Results", caption="Processing complete") + >>> smart_print_data_frame(df, "Wide Dataset", smart_print=False) + """ + if not smart_print: + # Always use full dataframe display when smart_print is disabled + print_data_frame(data_frame, title, apply_color, caption) + return + + # Smart adaptive logic + terminal_width = console.size.width + n_cols = len(data_frame.columns) + + # Calculate if columns will be too narrow for readability + estimated_col_width = max(60, terminal_width - 4) // max(n_cols, 1) + min_readable_width = 12 # Minimum width for readable columns + + # Use summary if too many columns or columns would be too narrow + if n_cols > 8 or estimated_col_width < min_readable_width: + print_data_frame_summary( + data_frame, + title + " (Dataset has a large nr. of columns, showing summary instead)", + apply_color, + caption, + ) + else: + print_data_frame(data_frame, title, apply_color, caption) + + +def print_dialog_section_title(print_str): + mango_style = Style(color="#F3921E", bold=True) + + console.print(print_str, style=mango_style)