In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import ast

def plot_reaction_counts(csv_filepath, top_n=50, output_image_path='reaction_counts.png'):
    """
    Reads reaction data, calculates summary stats, prints them, and generates
    a vertical bar plot sorted descending (largest bar on left).

    Args:
        csv_filepath (str): Path to the input CSV file.
        top_n (int): Number of top reaction types to display (None for all).
        output_image_path (str): Path to save the plot image.
    """
    try:
        df = pd.read_csv(csv_filepath, on_bad_lines='skip')

        # --- Data Processing ---
        def count_indices(indices_str):
            try:
                indices_list = ast.literal_eval(indices_str)
                return len(indices_list) if isinstance(indices_list, list) else 0
            except (ValueError, SyntaxError, TypeError, Exception):
                return 0

        df['count'] = df['reaction_indices'].fillna('[]').apply(count_indices)
        reaction_counts = df.groupby('reaction_type')['count'].sum() # No initial sort needed here

        # --- Calculate Summary Statistics ---
        total_reactions = reaction_counts.sum()
        total_reaction_types = len(reaction_counts)
        types_with_one_reaction = (reaction_counts == 1).sum()

        # --- Print Summary Statistics ---
        print("-" * 30)
        print("Reaction Data Summary:")
        print(f"Total number of reactions (sum of counts): {total_reactions}")
        print(f"Total number of unique reaction types:     {total_reaction_types}")
        print(f"Number of types with only one reaction:    {types_with_one_reaction}")
        print("-" * 30)

        # --- Prepare Data for Plotting ---
        plot_data_reactions = reaction_counts.sort_values(ascending=False).head(top_n)

        summary_stat_names = {
            "Reactions": total_reactions,
            "Unique Templates": total_reaction_types,
            "Single-Reaction Templates": types_with_one_reaction,
        }
        summary_colors = {
            "Reactions": (253/255, 132/255, 44/255),      #E5989B
            "Unique Templates": (221/255, 77/255, 46/255),   #F0B384
            "Single-Reaction Templates": (189/255, 36/255, 66/255)  #F3E5AB
        }
        summary_stats = pd.Series(summary_stat_names)

        combined_data = pd.concat([summary_stats, plot_data_reactions])

        combined_data = combined_data.sort_values(ascending=False)

        plt.rcParams.update({'font.size': 19})

        total_bars = len(combined_data)
        fig_width = max(10, total_bars * 0.35)
        fig, ax = plt.subplots(figsize=(0.8*fig_width, 8)) 
        ax.set_axisbelow(True) 

        bars = ax.bar(combined_data.index, combined_data.values, color=(89/255,14/255,88/255))

        for i, label in enumerate(combined_data.index):
            if label in summary_colors:
                bars[i].set_color(summary_colors[label])

        plt.xticks(rotation=45, ha='right')
        ax.tick_params(axis='x', labelsize=12)
        ax.set_ylabel('Counts')
        ax.set_yscale('log')
        ax.grid(True, axis='y', linestyle='-', alpha=0.3, which='major') # Grid lines based on major y-axis ticks
        ax.grid(True, axis='x', linestyle='-', alpha=0.3, which='major') # Grid lines based on major y-axis ticks
        fig.tight_layout()


        plt.savefig(output_image_path)
        print(f"Plot saved to {output_image_path}")
        plt.close(fig)
        plt.rcParams.update({'font.size': plt.rcParamsDefault['font.size']})

    except FileNotFoundError:
        print(f"Error: The file '{csv_filepath}' was not found.")
    except pd.errors.EmptyDataError:
        print(f"Error: The file '{csv_filepath}' is empty.")
    except ValueError as ve:
        print(f"Data Error: {ve}")

input_csv = 'reaction_types.csv'
num_top_reactions_to_show = 20
output_file = 'reaction_counts.pdf'

plot_reaction_counts(input_csv, top_n=num_top_reactions_to_show, output_image_path=output_file)

------------------------------
Reaction Data Summary:
Total number of reactions (sum of counts): 11926
Total number of unique reaction types:     3418
Number of types with only one reaction:    2170
------------------------------
Plot saved to reaction_counts.pdf
