In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt

def plot_puzzle_attributes(json_file_path, attributes=['totalWords'], num_puzzles=None):
    # Load the JSON data from the provided file path
    with open(json_file_path, 'r') as file:
        puzzles_data = json.load(file)

    # Validate attributes
    valid_attributes = ['totalWords', 'averageWordLength', 'percentCommon']
    for attribute in attributes:
        if attribute not in valid_attributes:
            raise ValueError(f"Invalid attribute '{attribute}'. Choose from {valid_attributes}.")

    # Extract puzzle names
    puzzle_names = list(puzzles_data.keys())

    # Limit the number of puzzles if num_puzzles is specified
    if num_puzzles is not None:
        puzzle_names = puzzle_names[:num_puzzles]

    # Calculate bar width and positions
    n = len(attributes)
    x = np.arange(len(puzzle_names))  # the label locations
    bar_width = 0.2
    offset = (n - 1) * bar_width / 2

    # Prepare the plot
    plt.figure(figsize=(20, 6))

    # Plot each attribute as bars with different colors
    for i, attribute in enumerate(attributes):
        attribute_values = [puzzles_data[puzzle][attribute] for puzzle in puzzle_names]
        plt.bar(x + i * bar_width - offset, attribute_values, width=bar_width, label=attribute)

    # Configure plot appearance
    plt.xlabel('Puzzle')
    plt.ylabel('Attribute Value')
    plt.title('Attributes Across Puzzles')
    # plt.xticks(x, puzzle_names, rotation=90)
    plt.legend()
    plt.tight_layout()

    # Display the plot
    plt.show()

def plot_puzzle_distribution(json_file_path, attribute='totalWords', bins=10):
    # Load the JSON data from the provided file path
    with open(json_file_path, 'r') as file:
        puzzles_data = json.load(file)

    # Extract the specified attribute for each puzzle
    attribute_values = [puzzles_data[puzzle][attribute] for puzzle in puzzles_data]

    # Create a histogram for the distribution of the specified attribute
    plt.figure(figsize=(10, 6))
    plt.hist(attribute_values, bins=bins, color='skyblue', edgecolor='black')
    plt.xlabel(attribute)
    plt.ylabel('Number of Puzzles')
    plt.title(f'Distribution of {attribute} Across Puzzles')
    plt.tight_layout()

    # Display the plot
    plt.show()

def plot_puzzle_data(json_file_path, num_puzzles=None, descending=False):
    # Load the JSON data from the provided file path
    with open(json_file_path, 'r') as file:
        puzzles_data = json.load(file)

    sorted_puzzles = sorted(puzzles_data.items(), key=lambda x: x[1]['totalWords'], reverse=descending)

    # Extract sorted puzzle names and total words
    sorted_puzzle_names = [puzzle[0] for puzzle in sorted_puzzles]
    sorted_total_words = [puzzle[1]['totalWords'] for puzzle in sorted_puzzles]

    # Limit the number of puzzles to plot if num_puzzles is specified
    if num_puzzles:
        sorted_puzzle_names = sorted_puzzle_names[:num_puzzles]
        sorted_total_words = sorted_total_words[:num_puzzles]

    # Create a bar plot for the total words per puzzle
    plt.figure(figsize=(14, 6))
    plt.bar(sorted_puzzle_names, sorted_total_words, color='blue')
    plt.xlabel('Puzzle Name')
    plt.ylabel('Total Words')
    plt.title(f'Total Words per Puzzle (Top {len(sorted_puzzle_names)} Puzzles)')
    plt.xticks(rotation=90)
    plt.tight_layout()

    # Display the plot
    plt.show()


plot_puzzle_data('all_puzzles.json', num_puzzles=100)
plot_puzzle_distribution('all_puzzles.json', attribute='totalWords', bins=200)
plot_puzzle_attributes('all_puzzles.json', attributes=['percentCommon'], num_puzzles=1000)


ModuleNotFoundError: No module named 'matplotlib'