In [1]:
# ==============
# Import modules
# ==============
import warnings
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io
import os
import seaborn as sns
from matplotlib.colors import ListedColormap

# Changing the CWD
os.chdir('/glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/F0AM_helper')
from F0AM_reader_MASTER import *
from data_processing import *
from Flight_transect_csv_reader import *
from Plotting_helper import *
warnings.filterwarnings('ignore')

#### Ozone budget

In [2]:
# Read compounds and each flight dataframe
Flight_IDs   = ['P-3B', 'RF03', 'RF07', 'RF09', 'FN19']

# Initialize an empty DataFrame to store the summary data
summary_df = pd.DataFrame()

# Using a loop to read data
for col, Flight_ID in enumerate(Flight_IDs):
    # Produciton and loss rates file directory
    file_prefix_budget  = '/glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/PL_budget/output/Ox/'
    # Concentration file directory
    if Flight_ID in ['RF03', 'RF07', 'RF09']: file_prefix_conc = '/glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/F0AM_analysis_TS/WE-CAN/Dataprocess/analysis_bycompound/'
    if Flight_ID in ['FN19']: file_prefix_conc                 = '/glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/F0AM_analysis_TS/FIREX-AQ/Dataprocess/analysis_bycompound/'
    if Flight_ID in ['P-3B']: file_prefix_conc                 = '/glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/F0AM_analysis_TS/P-3B/Dataprocess/analysis_bycompound/'

    # Read data
    df_prod = pd.read_csv(f"{file_prefix_budget}{Flight_ID}_Production_rates_evolution.csv", index_col=0)
    df_loss = pd.read_csv(f"{file_prefix_budget}{Flight_ID}_Loss_rates_evolution.csv", index_col=0)
    df_conc = pd.read_csv(f"{file_prefix_conc}O3/{Flight_ID}_mod_smk.csv", index_col=0)
    df_conc = df_conc.drop('GEOS-Chem + FUR', axis=1)
    
    # Check if the column 'MCM + GEOS-Chem VOCs + FUR' exists
    if 'MCM + GEOS-Chem VOCs + FUR' in df_prod:
        # Drop the column if it exists
        df_prod = df_prod.drop('MCM + GEOS-Chem VOCs + FUR', axis=1)
    if 'MCM + GEOS-Chem VOCs + FUR' in df_loss:
        # Drop the column if it exists
        df_loss = df_loss.drop('MCM + GEOS-Chem VOCs + FUR', axis=1)
    if 'MCM + GEOS-Chem VOCs + FUR' in df_conc:
        # Drop the column if it exists
        df_conc = df_conc.drop('MCM + GEOS-Chem VOCs + FUR', axis=1)


    # Calculate the mean of production and loss rates
    mean_prod    = df_prod.mean()
    mean_loss    = -df_loss.mean()
    mean_df_conc = df_conc.mean()
    mean_lifetime = mean_df_conc/mean_loss

    
    # Create a DataFrame from the means, ensuring data types are compatible
    temp_df = pd.DataFrame({
        'Flight_ID': [Flight_ID],  # Make sure it's in a list for consistency
        'Production Rate': [mean_prod.values.tolist()],  # Convert to list if it's a NumPy array or Series
        'Loss Rate': [mean_loss.values.tolist()],
        'Ozone concentration': [mean_df_conc.values.tolist()],
        'Lifetime': [mean_lifetime.values.tolist()],
    }).reset_index(drop=True)  # Reset index if needed, adjust depending on your structure

    # Append to the summary DataFrame
    summary_df = pd.concat([summary_df, temp_df])



In [3]:
# Define the model names
models = df_prod.columns

# Expand lists into separate rows for each model
expanded_rows = []
for _, row in summary_df.iterrows():
    for model_idx, model in enumerate(models):
        expanded_row = {
            'Flight_ID': row['Flight_ID'],
            'Model': model,
            'Production Rate': row['Production Rate'][model_idx],
            'Loss Rate': row['Loss Rate'][model_idx],
            'Ozone concentration': row['Ozone concentration'][model_idx],
            'Lifetime': row['Lifetime'][model_idx]
        }
        expanded_rows.append(expanded_row)

# Create a new DataFrame from the expanded rows
expanded_df = pd.DataFrame(expanded_rows)

# Now, if needed, pivot the expanded_df to organize it differently, for example, with Flight_ID as index and models as columns
# However, since we already separated models into rows, you might want to simply use expanded_df as your pivot table

expanded_df

Unnamed: 0,Flight_ID,Model,Production Rate,Loss Rate,Ozone concentration,Lifetime
0,P-3B,GEOS-Chem (base),10.493819,1.475334,36.998804,25.078258
1,P-3B,MCM + FUR,11.666518,2.215793,37.25451,16.813169
2,P-3B,MCM + GEOS-Chem VOCs,11.119712,1.466489,37.194557,25.362999
3,RF03,GEOS-Chem (base),93.894087,5.198733,121.397268,23.35132
4,RF03,MCM + FUR,109.35748,9.125058,129.576276,14.200049
5,RF03,MCM + GEOS-Chem VOCs,94.784289,5.227841,122.584709,23.448438
6,RF07,GEOS-Chem (base),21.783483,4.180858,142.262018,34.026988
7,RF07,MCM + FUR,23.796221,5.108044,141.762471,27.752787
8,RF07,MCM + GEOS-Chem VOCs,22.40245,3.858707,143.976362,37.312079
9,RF09,GEOS-Chem (base),8.068415,1.131546,67.309146,59.484216


In [4]:
# Classify Flight_IDs into 'VOC-rich' or 'NOx-rich'
flight_type = {
    'P-3B': 'VOC-rich',
    'RF07': 'VOC-rich',
    'RF09': 'VOC-rich',
    'RF03': 'NOx-rich',
    'FN19': 'NOx-rich'
}

expanded_df['Group'] = expanded_df['Flight_ID'].map(flight_type)

aggregated_results = expanded_df.groupby(['Group', 'Model']).agg(['mean', 'std']).reset_index()

# Initialize a new DataFrame to store formatted results
formatted_results = pd.DataFrame()

# Process and format mean±std
for col in ['Production Rate', 'Loss Rate', 'Ozone concentration', 'Lifetime']:
    mean_std = aggregated_results[col]['mean'].round().astype(int).astype(str) + '±' + aggregated_results[col]['std'].round().astype(int).astype(str)
    formatted_results[col] = mean_std

# Include the 'Group' and 'Model' columns in the formatted DataFrame
formatted_results['Group'] = aggregated_results['Group']
formatted_results['Model'] = aggregated_results['Model']

# Reorder columns to match the original order
columns_order = ['Group', 'Model', 'Production Rate', 'Loss Rate', 'Ozone concentration', 'Lifetime']
formatted_results = formatted_results[columns_order]

formatted_results


Unnamed: 0,Group,Model,Production Rate,Loss Rate,Ozone concentration,Lifetime
0,NOx-rich,GEOS-Chem (base),77±23,6±1,104±25,18±8
1,NOx-rich,MCM + FUR,97±18,12±4,114±22,11±5
2,NOx-rich,MCM + GEOS-Chem VOCs,80±22,6±2,106±24,18±8
3,VOC-rich,GEOS-Chem (base),13±7,2±2,82±54,40±18
4,VOC-rich,MCM + FUR,15±8,3±2,82±54,27±9
5,VOC-rich,MCM + GEOS-Chem VOCs,14±8,2±2,83±55,43±21


In [5]:
# Example DataFrame from previous steps - replace with your actual data
# Here, 'formatted_results' will have columns like 'Group', 'Model', and the parameters with their mean±std values

# Define a mapping for models to custom labels (if necessary)
model_labels = {
    'MCM + FUR': 'MCMBBVOC',
    'MCM + GEOS-Chem VOCs': 'MCMGC',
    'GEOS-Chem (base)': 'GC'
}

# Apply model label mapping
formatted_results['Model Label'] = formatted_results['Model'].map(model_labels)

# Setup for the desired structure - custom parameter labels and including model specifics
custom_params = {
    'Production Rate': 'Production Rate (ppb h-1)',
    'Loss Rate': 'Loss Rate (ppb h-1)',
    'Ozone concentration': 'Averaged ozone concentration (ppb)',
    'Lifetime': 'Lifetime (h)'

}

# Initialize an empty DataFrame for the final structure
final_structure = pd.DataFrame(columns=['Parameter', 'Model', 'VOC-rich', 'NOx-rich'])

# Populate the final structure DataFrame
for param, custom_label in custom_params.items():
    for model, label in model_labels.items():
        voc_row = formatted_results[(formatted_results['Group'] == 'VOC-rich') & (formatted_results['Model Label'] == label)][param].values
        nox_row = formatted_results[(formatted_results['Group'] == 'NOx-rich') & (formatted_results['Model Label'] == label)][param].values
        voc_val = voc_row[0] if len(voc_row) > 0 else ''
        nox_val = nox_row[0] if len(nox_row) > 0 else ''
        
        final_structure = final_structure.append({
            'Parameter': custom_label,
            'Model': label,
            'VOC-rich': voc_val,
            'NOx-rich': nox_val
        }, ignore_index=True)

# Pivoting for the desired format might not be directly applicable due to the structure, but this DataFrame is organized as required
final_structure.to_csv('/glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/VOC_OH_exposure/output/ozone_pltc.csv', index=True)  # Set index=False if you don't want to save row indices


In [6]:
final_structure

Unnamed: 0,Parameter,Model,VOC-rich,NOx-rich
0,Production Rate (ppb h-1),MCMBBVOC,15±8,97±18
1,Production Rate (ppb h-1),MCMGC,14±8,80±22
2,Production Rate (ppb h-1),GC,13±7,77±23
3,Loss Rate (ppb h-1),MCMBBVOC,3±2,12±4
4,Loss Rate (ppb h-1),MCMGC,2±2,6±2
5,Loss Rate (ppb h-1),GC,2±2,6±1
6,Averaged ozone concentration (ppb),MCMBBVOC,82±54,114±22
7,Averaged ozone concentration (ppb),MCMGC,83±55,106±24
8,Averaged ozone concentration (ppb),GC,82±54,104±25
9,Lifetime (h),MCMBBVOC,27±9,11±5


In [7]:
from docx import Document
from docx.shared import Pt

# Create a new Document
doc = Document()
doc.add_heading('Aggregated Results', level=1)

# Add the table
table = doc.add_table(rows=1, cols=4)
table.style = 'Table Grid'

# Header row
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Parameter'
hdr_cells[1].text = 'Model'
hdr_cells[2].text = 'VOC-rich'
hdr_cells[3].text = 'NOx-rich'

# Add a row for each item in final_structure
for index, row in final_structure.iterrows():
    row_cells = table.add_row().cells
    row_cells[0].text = row['Parameter']
    row_cells[1].text = row['Model']
    row_cells[2].text = row['VOC-rich']
    row_cells[3].text = row['NOx-rich']
    
    # Optional: Adjust font size for each cell
    for cell in row_cells:
        paragraphs = cell.paragraphs
        for paragraph in paragraphs:
            for run in paragraph.runs:
                run.font.size = Pt(10)  # Example font size adjustment

# Save the document
doc_path = '/glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/VOC_OH_exposure/output/ozone_pltc.docx'  # Update with your actual path
doc.save(doc_path)

print(f"Document saved to {doc_path}")


Document saved to /glade/work/lixujin/PYTHON/SciProj/Box_modeling_analysis/VOC_OH_exposure/output/ozone_pltc.docx
