In [1]:
from vega_datasets import data
from itertools import combinations
from NeuroSymbolicVisualizer import NeuroSymbolicVisualizer
from IPython.display import display
from altair.vegalite.v5.api import FacetChart

def display_chart(chart: FacetChart, tup: (int, str)) -> int:
    """
    Displays the provided chart along with the given score and its explanation and
    returns the same score that was passed in, for further use.
    """
    (score, explanation) = tup
    print(f"SCORE: {score}")
    print(explanation)
    display(chart)
    return score

visual = NeuroSymbolicVisualizer(data.cars)

{'number_rows': 406, 'field': [{'name': 'name', 'type': 'string', 'unique': 311, 'entropy': 5617, 'freq': np.int64(6)}, {'name': 'miles_per_gallon', 'type': 'number', 'unique': 129, 'entropy': 4312, 'min': 9, 'max': 46, 'std': 7}, {'name': 'cylinders', 'type': 'number', 'unique': 5, 'entropy': 1103, 'min': 3, 'max': 8, 'std': 1}, {'name': 'displacement', 'type': 'number', 'unique': 83, 'entropy': 3982, 'min': 68, 'max': 455, 'std': 104}, {'name': 'horsepower', 'type': 'number', 'unique': 93, 'entropy': 4064, 'min': 46, 'max': 230, 'std': 38}, {'name': 'weight_in_lbs', 'type': 'number', 'unique': 356, 'entropy': 5821, 'min': 1613, 'max': 5140, 'std': 847}, {'name': 'acceleration', 'type': 'number', 'unique': 96, 'entropy': 4133, 'min': 8, 'max': 24, 'std': 2}, {'name': 'year', 'type': 'datetime', 'unique': 12, 'entropy': 2454}, {'name': 'origin', 'type': 'string', 'unique': 3, 'entropy': 920, 'freq': np.int64(254)}]}


In [4]:
llm_choice = visual.recommend_columns_llm()
print(f"LLM's choice: {llm_choice}")
llm_score = display_chart(visual.recommend_chart_asp(*llm_choice), visual.eval_chart_llm(*llm_choice))

print("=============================================================================================================================================")
print(f"Charts for all other column choices\n\n")

all_combinations = list(combinations(visual.all_columns(), 2))
all_chart_scores = []

for i, cols in enumerate(all_combinations):
    print(f"Chart {i+1} of {len(all_combinations)}: {cols}")
    if cols == llm_choice:
        print ("LLM's choice (chart at the top)")
        all_chart_scores.append(llm_score)
    else:
        all_chart_scores.append(
            display_chart(visual.recommend_chart_asp(*cols), visual.eval_chart_llm(*cols))
        )
    print("---------------------------------------------------------------------------------------------------------------------------------------------")

print("=============================================================================================================================================")
if llm_score >= max(all_chart_scores):
    print("The LLM made an ideal choice!")
else:
    max_index = max(enumerate(all_chart_scores), key=lambda x: x[1])[0]
    print(f"A highest scoring chart is:\n Chart {max_index+1}: {all_combinations[max_index]}")

print(f"All chart costs summary:\n{list(zip(all_combinations, all_chart_scores))}")

LLM's choice: ('miles_per_gallon', 'weight_in_lbs')
SCORE: 85
The visualization effectively uses scatter plots to compare the relationship between miles_per_gallon and weight_in_lbs across different cylinder categories. The clear separation into distinct plots makes it easy to observe trends and differences among cylinder counts. However, improving axis labels or adding a legend could enhance clarity further.


Charts for all other column choices


Chart 1 of 21: ('miles_per_gallon', 'cylinders')
SCORE: 65
The visualization effectively displays the relationship between the number of cylinders and miles per gallon using a series of scatter plots. However, the separation of plots may make it challenging to identify trends across categories. Including a combined plot or clearer distinctions between overlapping points could enhance readability and insights.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 2 of 21: ('miles_per_gallon', 'displacement')
SCORE: 85
The scatter plot effectively compares displacement against miles per gallon across different car origins (Europe, Japan, and USA). The use of separate panels for each origin allows for clear visual differentiation and comparison of patterns, providing insights into how displacement relates to fuel efficiency in different datasets.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 3 of 21: ('miles_per_gallon', 'horsepower')
SCORE: 85
The scatter plot effectively visualizes the relationship between horsepower and miles per gallon, segmented by the origin of the vehicles. The three separate plots allow for easy comparison across different categories, highlighting trends and variances in fuel efficiency based on the origin of the cars. However, it could benefit from additional context or annotations for clarity.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 4 of 21: ('miles_per_gallon', 'weight_in_lbs')
LLM's choice (chart at the top)
---------------------------------------------------------------------------------------------------------------------------------------------
Chart 5 of 21: ('miles_per_gallon', 'acceleration')
SCORE: 85
The scatter plot effectively shows the relationship between "miles_per_gallon" and "acceleration" across different car origins (Europe, Japan, USA). The use of separate panels for each origin allows for easy comparison, and the transparency of the points helps reveal density in areas with many overlaps. However, additional enhancements, such as trend lines or annotations, could further improve clarity.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 6 of 21: ('miles_per_gallon', 'year')
SCORE: 85
The visualization effectively utilizes a faceted approach to display the relationship between year and miles per gallon for different cylinder counts. Each panel clearly shows the trend across years, allowing for easy comparison between different category groups. The use of scatter plots enhances the ability to observe data distribution and variability, though incorporating additional summary statistics (like trend lines) may provide further insights. Overall, it communicates the information well.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 7 of 21: ('cylinders', 'displacement')
SCORE: 75
The visualization effectively uses a grouped scatter plot to display the relationship between 'cylinders' and 'displacement' categorized by 'origin'. This allows for a clear comparison between different origins, highlighting trends and outliers. However, the density of points and lack of interconnected lines or means could reduce clarity for some viewers.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 8 of 21: ('cylinders', 'horsepower')
SCORE: 85
The visualization effectively displays the relationship between the number of cylinders and horsepower for different car origins (Europe, Japan, USA). The use of scatter plots allows for easy comparison of distributions across categories. However, including more context or annotations could enhance interpretability further.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 9 of 21: ('cylinders', 'weight_in_lbs')
SCORE: 75
The scatter plot effectively displays the relationship between `weight_in_lbs` and `cylinders` across different categories. Each subplot clearly represents the distribution of weights for vehicle models with varying cylinder counts, making it easier to observe trends or outliers. However, improving the clarity of axis labels and enhancing marker visibility could enhance the overall effectiveness of the visualization.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 10 of 21: ('cylinders', 'acceleration')
SCORE: 70
This visualization effectively displays the relationship between acceleration and the number of cylinders across different origins (Europe, Japan, USA). However, the overlapping points may hinder clarity, and it could benefit from additional enhancements like color coding or transparency adjustments to better represent the data distribution.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 11 of 21: ('cylinders', 'year')
SCORE: 85
The visualization effectively displays the number of cylinders in cars from different origins (Europe, Japan, USA) over the years. The use of small multiples allows for easy comparison across categories, while the clarity of axes and consistent scale enhances interpretability. However, adding labels or annotations could further clarify significant trends or variations.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 12 of 21: ('displacement', 'horsepower')
SCORE: 85
The visualization effectively uses a series of scatter plots to show the relationship between horsepower and displacement for different cylinder counts. This layout allows for easy comparison across categories (cylinder counts), making it clear how the relationship varies by this variable. The plots are well-labeled, but could benefit from additional context or annotations for clarity.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 13 of 21: ('displacement', 'weight_in_lbs')
SCORE: 85
The scatter plot effectively visualizes the relationship between weight and displacement across different origins (Europe, Japan, USA). It allows for quick comparisons and insights into how these variables correlate within each group. The use of separate panels enhances clarity and understanding.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 14 of 21: ('displacement', 'acceleration')
SCORE: 85
The visualization effectively uses scatter plots to assess the relationship between 'displacement' and 'acceleration' across different 'cylinders', allowing for clear comparison of trends in a structured manner. However, some plots have limited data points, which could impact interpretability.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 15 of 21: ('displacement', 'year')
SCORE: 85
The visualization effectively presents the relationship between 'year' and 'displacement' across different car origins (Europe, Japan, USA). The use of separate scatter plots allows for clear comparison and highlights variations in displacement trends over time among the three origins. The inclusion of grid lines also aids in better readability.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 16 of 21: ('horsepower', 'weight_in_lbs')
SCORE: 85
The visualization effectively uses scatter plots to compare the relationship between horsepower and weight for vehicles from different origins (Europe, Japan, USA). This clear division allows for easy comparison of trends across groups. The axes are labeled, and the use of distinct panels enhances readability, though it could benefit from additional context or a title for even better comprehension.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 17 of 21: ('horsepower', 'acceleration')
SCORE: 85
The visualization effectively uses separate scatter plots to illustrate the relationship between horsepower and acceleration for different cylinder counts. This allows for easy comparison across categories. However, some plots appear sparse, which could benefit from additional data points to enhance clarity. Overall, it conveys important insights about the dataset.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 18 of 21: ('horsepower', 'year')
SCORE: 85
This visualization effectively shows the distribution of horsepower over the years, segmented by the origin of the cars. The use of scatter plots allows for comparison across three categories (Europe, Japan, USA) over time, highlighting trends and variations. However, it could benefit from clearer labeling or color coding for better clarity.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 19 of 21: ('weight_in_lbs', 'acceleration')
SCORE: 85
The visualization effectively uses small multiples of scatter plots to show the relationship between acceleration and weight in pounds, segmented by the number of cylinders. This format makes it easy to compare trends across different cylinder counts, and the clear labeling adds to its clarity. However, there could be improvements in setting tighter y-axis limits for better visibility of data points.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 20 of 21: ('weight_in_lbs', 'year')
SCORE: 75
The visualization effectively uses a faceted scatter plot to display the relationship between the `year` and `weight_in_lbs` for different `cylinders`. This format allows for quick comparison across categories, though the details could be enhanced by adding a clearer scale for weight and possibly more context about the dataset.


---------------------------------------------------------------------------------------------------------------------------------------------
Chart 21 of 21: ('acceleration', 'year')
SCORE: 75
The visualization effectively uses small multiples to display the relationship between 'year' and 'acceleration' for different 'cylinder' categories. This allows for easy comparison across groups. However, the clarity could be improved by adding axis labels or annotations to enhance understanding of the trends within the data.


---------------------------------------------------------------------------------------------------------------------------------------------
The LLM made an ideal choice!
All chart costs summary:
[(('miles_per_gallon', 'cylinders'), 65), (('miles_per_gallon', 'displacement'), 85), (('miles_per_gallon', 'horsepower'), 85), (('miles_per_gallon', 'weight_in_lbs'), 85), (('miles_per_gallon', 'acceleration'), 85), (('miles_per_gallon', 'year'), 85), (('cylinders', 'displacement'), 75), (('cylinders', 'horsepower'), 85), (('cylinders', 'weight_in_lbs'), 75), (('cylinders', 'acceleration'), 70), (('cylinders', 'year'), 85), (('displacement', 'horsepower'), 85), (('displacement', 'weight_in_lbs'), 85), (('displacement', 'acceleration'), 85), (('displacement', 'year'), 85), (('horsepower', 'weight_in_lbs'), 85), (('horsepower', 'acceleration'), 85), (('horsepower', 'year'), 85), (('weight_in_lbs', 'acceleration'), 85), (('weight_in_lbs', 'year'), 75), (('acceleration', 'year'), 75)]
