In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.util import Inches, Pt
import os

In [None]:
# Path to OPC data file
path = r'C:\Users\GeorgiaRg\Documents\ASCENT\OPC\Data-20260220T185711Z-1-001\Data\OPCdNcc_unit2.csv'

# output_folder is the destination folder of all of the plots below
output_dir = r'C:\Users\GeorgiaRg\Documents\ASCENT\OPC\Data-20260220T185711Z-1-001\Data\Output'

In [None]:
df = pd.read_csv(path)
df['local_time_pst'] = pd.to_datetime(df['local_time_pst'])

In [None]:
# 2. Extract bin boundaries (Dp) from column headers (300, 350, 400...)
# We exclude the 'local_time_pst' column
dp_boundaries = np.array([float(col) for col in df.columns[1:]])

# Since we need an upper bound for the last bin, we can estimate it
# by assuming the last step ratio is the same as the second to last.
last_ratio = dp_boundaries[-1] / dp_boundaries[-2]
dp_upper = np.append(dp_boundaries[1:], dp_boundaries[-1] * last_ratio)
dp_lower = dp_boundaries

# 3. Calculate Bin Constants
dlogDp = np.log10(dp_upper) - np.log10(dp_lower)
dp_geo_mean = np.sqrt(dp_lower * dp_upper) # Geometric mean diameter

# 4. Extract only the concentration data (the grid)
raw_counts = df.iloc[:, 1:].values  # Everything except the timestamp

# 5. Calculate Distributions
# dN/dlogDp
dn_dlogdp = raw_counts / dlogDp

# dV/dlogDp (Assuming Dp is in nm, converting to um^3/cm^3)
# Volume of sphere = (pi/6) * D^3.
# Conversion: (nm^3) * 1e-9 = um^3
dv_dlogdp = dn_dlogdp * (np.pi / 6) * (dp_geo_mean**3) * 1e-9

# dM/dlogDp (Density = 1 g/cc = 1 ug/um^3)
# Since density is 1, the value remains the same as Volume Distribution
dm_dlogdp = dv_dlogdp * 1.0

# 6. Reconstruct into DataFrames for easy plotting
df_dn = pd.DataFrame(dn_dlogdp, columns=df.columns[1:], index=df['local_time_pst'])
df_dv = pd.DataFrame(dv_dlogdp, columns=df.columns[1:], index=df['local_time_pst'])
df_dm = pd.DataFrame(dm_dlogdp, columns=df.columns[1:], index=df['local_time_pst'])

print("Distributions calculated successfully!")

In [None]:
# 1. Prep the Data (Assuming df_dn and df_dm from previous step)
# Ensure the index is a DatetimeIndex
df_dn.index = pd.to_datetime(df_dn.index)
df_dm.index = pd.to_datetime(df_dm.index)

# 2. Resample to Monthly Averages
monthly_dn = df_dn.resample('M').mean()
monthly_dm = df_dm.resample('M').mean()

# 3. Plotting Loop
# Each iteration of this loop creates one "Slide" (Figure)
for month in monthly_dn.index:
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 10), sharex=True)

    month_str = month.strftime('%B %Y')

    # --- Upper Plot: Number Distribution ---
    ax1.plot(dp_geo_mean, monthly_dn.loc[month], color='blue', lw=2, marker='o', markersize=4)
    ax1.set_ylabel(r'$dN/d\log D_p$ (#/cm$^3$)', fontsize=12)
    ax1.set_title(f'Aerosol Size Distribution: {month_str}', fontsize=14, fontweight='bold')
    ax1.grid(True, which="both", ls="-", alpha=0.3)
    ax1.set_xscale('log')

    # --- Lower Plot: Mass Distribution ---
    ax2.plot(dp_geo_mean, monthly_dm.loc[month], color='red', lw=2, marker='s', markersize=4)
    ax2.set_ylabel(r'$dM/d\log D_p$ ($\mu$g/m$^3$)', fontsize=12)
    ax2.set_xlabel('Particle Diameter $D_p$ (nm)', fontsize=12)
    ax2.grid(True, which="both", ls="-", alpha=0.3)
    ax2.set_xscale('log')

    plt.tight_layout()

    # 4. Save for PowerPoint
    # This saves each month as a separate PNG file
    file_name = f"distribution_{month.strftime('%Y_%m')}.png"
    plt.savefig(file_name, dpi=300)