In [25]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import glob
import ipywidgets as widgets
from IPython.display import display

plt.rcParams['figure.figsize'] = (20, 16)

def process_and_plot(poss_file, times_file, ax1, ax2, ax3, ax4, label, use_boundary_layer, show_bl_height):
    try:
        poss = np.load(poss_file, allow_pickle=True)
        times = np.load(times_file, allow_pickle=True)
    except Exception as e:
        print(f"Error loading files: {str(e)}")
        print(f"Poss file: {poss_file}")
        print(f"Times file: {times_file}")
        return None, None

    heights = poss[:, 2, :]
    if use_boundary_layer:
        bl_heights = poss[:, 3, :]
    
    # Convert times to datetime objects if they're not already
    times = pd.to_datetime(times)
    
    # Calculate the most recent date
    most_recent_date = times.max()
    
    # Calculate time difference from the most recent date in hours
    time_diff_hours = (most_recent_date - times).total_seconds() / 3600
    
    # Check for NaNs
    if use_boundary_layer:
        valid_mask = ~np.isnan(heights) & ~np.isnan(bl_heights)
    else:
        valid_mask = ~np.isnan(heights)
    print(f"File: {label}")
    print(f"Total data points: {heights.size}")
    print(f"Valid data points: {np.sum(valid_mask)}")
    print(f"NaN or invalid data points: {heights.size - np.sum(valid_mask)}")
    
    # Calculate average heights for each time step
    avg_particle_height = np.nanmean(heights, axis=0)
    if use_boundary_layer:
        avg_bl_height = np.nanmean(bl_heights, axis=0)
    
    # Std height
    std_height = np.nanstd(heights, axis=0)
    
    # Calculate the difference if using boundary layer
    if use_boundary_layer:
        height_difference = avg_particle_height - avg_bl_height
    
    # Plot 1: Mean Height with Standard Deviation
    ax1.plot(time_diff_hours, avg_particle_height, label=f'{label} Mean Height')
    ax1.fill_between(time_diff_hours, avg_particle_height - std_height, avg_particle_height + std_height, alpha=0.3)
    if use_boundary_layer and show_bl_height:
        ax1.plot(time_diff_hours, avg_bl_height, linestyle='--', label=f'{label} Mean BL Height')
    
    # Plot 2: Average Particle Height (and Boundary Layer Height if used)
    ax2.plot(time_diff_hours, avg_particle_height, label=f'{label} Particle Height')
    if use_boundary_layer and show_bl_height:
        ax2.plot(time_diff_hours, avg_bl_height, linestyle='--', label=f'{label} BL Height')
    
    # Plot 3: Height Difference (only if using boundary layer)
    if use_boundary_layer:
        ax3.plot(time_diff_hours, height_difference, label=label)
    
    # Plot 4: Particle Distribution over Time (only if using boundary layer)
    if use_boundary_layer:
        above_bl = np.sum(heights > bl_heights, axis=0)
        below_bl = np.sum(heights <= bl_heights, axis=0)
        percent_below_bl = (below_bl / (below_bl + above_bl)) * 100
        ax4.plot(time_diff_hours, percent_below_bl, label=f'{label} Below BL')
    
    return most_recent_date, time_diff_hours

def set_x_axis(ax, most_recent_date, max_time_diff):
    if most_recent_date is None:
        print("Warning: most_recent_date is None")
        return
    tick_positions = np.arange(0, max_time_diff + 24, 24)  # Create ticks every 24 hours
    ax.set_xticks(tick_positions)
    ax.set_xticklabels([f'{int(h/24)}d' for h in tick_positions])
    ax.set_xlabel(f'Time before {most_recent_date.strftime("%Y-%m-%d")}')
    # ax.invert_xaxis()

def plot_data(selected_files, use_boundary_layer, show_bl_height):
    # Create a figure with four subplots in a 2x2 grid
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(20, 16))
    
    # Get all .pkl files in the 'pkl_files' directory
    pkl_dir = 'pkl_files'
    
    print(f"Processing {len(selected_files)} files")
    
    # Process each selected file
    max_time_diff = 0
    most_recent_date = None
    for file in selected_files:
        poss_file = os.path.join(pkl_dir, f"{file}_part_poss.pkl")
        times_file = os.path.join(pkl_dir, f"{file}_part_times.pkl")
        if os.path.exists(poss_file) and os.path.exists(times_file):
            label = file[:6]  # Use the first 6 letters of the file name as label
            print(f"Processing files: {poss_file} and {times_file}")
            result = process_and_plot(poss_file, times_file, ax1, ax2, ax3, ax4, label, use_boundary_layer, show_bl_height)
            if result[0] is not None:
                most_recent_date, time_diff_hours = result
                max_time_diff = max(max_time_diff, np.max(time_diff_hours))
        else:
            print(f"Files not found for {file}")
    
    # Set titles and labels for each subplot
    ax1.set_ylabel('Height')
    ax1.set_title('Mean Height with Standard Deviation')
    ax1.legend()
    ax1.grid(True)
    set_x_axis(ax1, most_recent_date, max_time_diff)
    
    ax2.set_ylabel('Height')
    ax2.set_title('Average Particle Height' + (' and Boundary Layer Height' if (use_boundary_layer and show_bl_height) else ''))
    ax2.legend()
    ax2.grid(True)
    set_x_axis(ax2, most_recent_date, max_time_diff)
    
    if use_boundary_layer:
        ax3.axhline(y=0, color='green', linestyle='--', label='y=0')
        ax3.set_ylabel('Height Difference')
        ax3.set_title('Average Particle Height minus Average Boundary Layer Height')
        ax3.legend()
        ax3.grid(True)
        set_x_axis(ax3, most_recent_date, max_time_diff)
        
        ax4.set_ylabel('Percenrage of Particles Below Boundary Layer')
        ax4.set_title('Distribution of Particles Relative to Boundary Layer')
        ax4.legend()
        ax4.grid(True)
        set_x_axis(ax4, most_recent_date, max_time_diff)
    else:
        fig.delaxes(ax3)
        fig.delaxes(ax4)
    
    # Adjust layout and show plot
    plt.tight_layout()
    # plt.savefig(fname='Northerly CAO above cloud (ignore BL)', format='png')
    plt.show()
    
    if most_recent_date is None:
        print("No data was processed. Check if the selected files exist and contain valid data.")

def get_available_files():
    pkl_dir = 'pkl_files'
    poss_files = glob.glob(os.path.join(pkl_dir, '*_part_poss.pkl'))
    times_files = glob.glob(os.path.join(pkl_dir, '*_part_times.pkl'))

    # Get the common prefix for matching pairs
    poss_prefixes = [os.path.basename(f).replace('_part_poss.pkl', '') for f in poss_files]
    times_prefixes = [os.path.basename(f).replace('_part_times.pkl', '') for f in times_files]

    # Find the common prefixes that have both _part_poss.pkl and _part_times.pkl files
    available_prefixes = [prefix for prefix in poss_prefixes if prefix in times_prefixes]

    return available_prefixes

In [26]:
# Get the available files
available_files = get_available_files()
# Create widgets for user input
file_select_widget = widgets.SelectMultiple(
    options=available_files,
    value=[available_files[0]],  # Default to first file selected
    description='Select files:',
    disabled=False
)
use_bl_widget = widgets.Checkbox(value=True, description='Use Boundary Layer Data')
show_bl_height_widget = widgets.Checkbox(value=True, description='Show BL Height in Plots 1 & 2')
# Display widgets
display(file_select_widget, use_bl_widget, show_bl_height_widget)
# Create a button to generate the plot
plot_button = widgets.Button(description="Generate Plot")
display(plot_button)
# Define what happens when the button is clicked
# Hold shift to select multiple plots in a row
# If the plots are not listed next to each other hold crtl
def on_button_clicked(b):
    plot_data(file_select_widget.value, use_bl_widget.value, show_bl_height_widget.value)
plot_button.on_click(on_button_clicked)

SelectMultiple(description='Select files:', index=(0,), options=('c275r1', 'c276r2', 'c276r4', 'c277r1', 'c278…

Checkbox(value=True, description='Use Boundary Layer Data')

Checkbox(value=True, description='Show BL Height in Plots 1 & 2')

Button(description='Generate Plot', style=ButtonStyle())

In [3]:
test = np.load('pkl_files/c275r1_part_poss.pkl', allow_pickle = True)
print(test)

[[[ 152.52645874  152.42181396  152.34527588 ...   24.04508972
     23.87506104   23.82008362]
  [  77.21325684   77.24002075   77.26553345 ...   76.63989258
     76.59417725   76.56015015]
  [ 245.93891907  246.66073608  247.38198853 ...  199.92028809
    389.45040894  202.70787048]
  [ 100.          100.          100.         ... 1003.42333984
    992.14294434  986.76824951]]

 [[ 137.12863159  137.15426636  137.18280029 ...   24.03970337
     24.01411438   23.91954041]
  [  75.46891785   75.45019531   75.43127441 ...   76.57197571
     76.53656006   76.514328  ]
  [ 205.4513855   208.86305237  220.55033875 ...  813.28833008
    502.71334839  801.93701172]
  [ 100.81509399  101.9835434   103.26569366 ... 1007.65789795
   1003.39233398  995.17858887]]

 [[ 123.18795776  123.32965088  123.45336914 ...   24.01708984
     23.95083618   23.89305115]
  [  75.41094971   75.43167114   75.4526062  ...   76.66149902
     76.6381073    76.60151672]
  [ 346.84933472  347.50421143  347.99609375 .