In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.util import Inches, Pt
import os
import io
import matplotlib.ticker as ticker # Imported to customize x-axis ticks

## OPC data import. Raw file information is: Dp in nm, time is in local time (PST) and the number concentrations are given not divided by the bin size (dlogDp) (check the README file for more info)

In [2]:
# Paths to OPC data files for the 3 different units
path_unit2 = r'C:\Users\GeorgiaRg\Documents\ASCENT\OPC\Data-20260220T185711Z-1-001\OPCdNcc_unit2.csv'
path_unit4 = r'C:\Users\GeorgiaRg\Documents\ASCENT\OPC\Data-20260220T185711Z-1-001\OPCdNcc_unit4.csv'
path_unit8 = r'C:\Users\GeorgiaRg\Documents\ASCENT\OPC\Data-20260220T185711Z-1-001\OPCdNcc_unit8.csv'

# output_folder is the destination folder of all of the plots below
output_dir = r'C:\Users\GeorgiaRg\Documents\ASCENT\OPC\Data-20260220T185711Z-1-001\Output'

In [4]:
df_unit2 = pd.read_csv(path_unit2)
df_unit4 = pd.read_csv(path_unit4)
df_unit8 = pd.read_csv(path_unit8)
df_unit2['local_time_pst'] = pd.to_datetime(df_unit2['local_time_pst'])
df_unit4['local_time_pst'] = pd.to_datetime(df_unit4['local_time_pst'])
df_unit8['local_time_pst'] = pd.to_datetime(df_unit8['local_time_pst'])

## Size distributions calculation (number, volume, mass-change particle density as needed)

In [5]:
# Extract bin boundaries (Dp) from column headers (300, 350, 400...)
# We exclude the 'local_time_pst' column
dp_boundaries = np.array([float(col) for col in df.columns[1:]])

# Since we need an upper bound for the last bin, we can estimate it
# by assuming the last step ratio is the same as the second to last.
last_ratio = dp_boundaries[-1] / dp_boundaries[-2]
dp_upper = np.append(dp_boundaries[1:], dp_boundaries[-1] * last_ratio)
dp_lower = dp_boundaries

# Calculate Bin Constants
dlogDp = np.log10(dp_upper) - np.log10(dp_lower)
dp_geo_mean = np.sqrt(dp_lower * dp_upper) # Geometric mean diameter

# Extract only the concentration data (the grid)
raw_counts = df.iloc[:, 1:].values  # Everything except the timestamp

# Calculate Distributions
# dN/dlogDp
dn_dlogdp = raw_counts / dlogDp

# dV/dlogDp (Dp is in nm, converting to um^3/cm^3)
# Volume of sphere = (pi/6) * D^3.
# Conversion: (nm^3) * 1e-9 = um^3
dv_dlogdp = dn_dlogdp * (np.pi / 6) * (dp_geo_mean**3) * 1e-9

# dM/dlogDp (Density = 1 g/cc = 1 ug/um^3) CHANGE DENSITY HERE IF NEEDED
# Since density is 1, the value remains the same as Volume Distribution
dm_dlogdp = dv_dlogdp * 1.0

# Reconstruct into DataFrames for plotting
df_dn = pd.DataFrame(dn_dlogdp, columns=df.columns[1:], index=df['local_time_pst'])
df_dv = pd.DataFrame(dv_dlogdp, columns=df.columns[1:], index=df['local_time_pst'])
df_dm = pd.DataFrame(dm_dlogdp, columns=df.columns[1:], index=df['local_time_pst'])

print("Distributions calculated successfully.")

Distributions calculated successfully!


## Daily Average number and mass distributions into PP

In [6]:
# Prep the Data
df_dn.index = pd.to_datetime(df_dn.index)
df_dm.index = pd.to_datetime(df_dm.index)

# 2. Resample to DAILY Averages
daily_dn = df_dn.resample('D').mean().dropna(how='all')
daily_dm = df_dm.resample('D').mean().dropna(how='all')

# Make sure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# 3. Initialize the PowerPoint Presentation
prs = Presentation()
blank_slide_layout = prs.slide_layouts[6]

# 4. Plotting Loop & PowerPoint Generation
for day in daily_dn.index:
    # sharex=True means setting the x-axis on ax2 will apply to ax1 as well
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 10), sharex=True)

    day_str = day.strftime('%Y-%m-%d')

    # --- Upper Plot: Number Distribution ---
    ax1.plot(dp_geo_mean[:-1], daily_dn.loc[day].iloc[:-1], color='blue', lw=2, marker='o', markersize=4)
    ax1.set_ylabel(r'$dN/d\log D_p$ (#/cm$^3$)', fontsize=12)
    ax1.set_title(f'Daily Aerosol Size Distribution: {day_str}', fontsize=14, fontweight='bold')
    ax1.grid(True, which="both", ls="-", alpha=0.3)

    # --- Lower Plot: Mass Distribution ---
    ax2.plot(dp_geo_mean[:-1], daily_dm.loc[day].iloc[:-1], color='red', lw=2, marker='s', markersize=4)
    ax2.set_ylabel(r'$dM/d\log D_p$ ($\mu$g/m$^3$)', fontsize=12)
    ax2.set_xlabel('Particle Diameter $D_p$ (nm)', fontsize=12)
    ax2.grid(True, which="both", ls="-", alpha=0.3)

    # --- Customizing the Logarithmic X-Axis Ticks ---
    ax2.set_xscale('log')
    # Use LogLocator to specify exactly which sub-intervals should get a tick
    # subs=(1.0, 2.0, 3.0, 4.0, 5.0, 7.0) will place ticks at 100, 200, 300, 400, 500, 700, 1000, etc.
    ax2.xaxis.set_major_locator(ticker.LogLocator(base=10.0, subs=(1.0, 2.0, 3.0, 4.0, 5.0, 7.0)))
    # Use ScalarFormatter so it shows normal numbers (e.g., 300) instead of scientific notation (3x10^2)
    formatter = ticker.ScalarFormatter()
    formatter.set_scientific(False)
    ax2.xaxis.set_major_formatter(formatter)

    # Optional: Rotate the tick labels slightly if they end up overlapping
    plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')

    plt.tight_layout()

    # Save the figure to an in-memory buffer
    image_stream = io.BytesIO()
    plt.savefig(image_stream, format='png', dpi=300)
    plt.close(fig)
    image_stream.seek(0)

    # --- Add to PowerPoint ---
    slide = prs.slides.add_slide(blank_slide_layout)
    left = Inches(1.5)
    top = Inches(0)
    height = Inches(7.5)
    slide.shapes.add_picture(image_stream, left, top, height=height)

# 5. Save the Presentation
ppt_filename = os.path.join(output_dir, "Daily_Distributions_Presentation.pptx")
prs.save(ppt_filename)

print(f"Presentation generated successfully. Saved to: {ppt_filename}")

Presentation generated successfully! Saved ONLY to: C:\Users\GeorgiaRg\Documents\ASCENT\OPC\Data-20260220T185711Z-1-001\Data\Output\Daily_Distributions_Presentation.pptx
