In [14]:
import pandas as pd
import numpy as np

In [15]:
d = pd.read_csv('../tables/2024_03_11_merged_03.csv', parse_dates=['Time_Mid'])

In [16]:
d['N_tot'] = d['nSMPS'] + d['nLAS_AmmSO4']
d['OMF'] = d['Org_Ave_IsoK_STP'] / d['ams_tot']

In [17]:
d = d.dropna(subset=['N_CCN_stdPT', 'N_tot', 'D_a', 'k_obs', 'OMF'])

In [18]:
# Get the count of each unique value in the 'CCN_SS' column
value_counts = d['CCN_SS'].value_counts()

# Convert the Series to a DataFrame for better presentation
value_counts_df = value_counts.reset_index()
value_counts_df.columns = ['Unique Value', 'Count']
value_counts_df.sort_values(by='Unique Value', inplace=True)
value_counts_df

Unnamed: 0,Unique Value,Count
66,0.160,9
92,0.165,6
23,0.170,21
90,0.175,6
40,0.180,14
...,...,...
102,0.695,5
48,0.700,12
97,0.705,6
22,0.710,22


In [19]:
d = d[(d['CCN_SS'] >= 0.37) & (d['CCN_SS'] <= 0.43)]

In [20]:
transit_flights = [pd.Timestamp('2022-03-22').date(), pd.Timestamp('2022-05-18').date(), pd.Timestamp('2022-05-21').date(), pd.Timestamp('2022-05-31').date(), pd.Timestamp('2022-06-18').date()]
df_winter = d[d['Time_Mid'].dt.month.isin([11, 12, 1, 2, 3, 4]) & (~d['Time_Mid'].dt.date.isin(transit_flights))]
df_spring = d[d['Time_Mid'].dt.month.isin([5, 6]) & (np.logical_not((d['Time_Mid'].dt.month.isin([6])) & (d['Time_Mid'].dt.year.isin([2022])))) & (~d['Time_Mid'].dt.date.isin(transit_flights))]
df_summer = d[d['Time_Mid'].dt.month.isin([8, 9]) & (~d['Time_Mid'].dt.date.isin(transit_flights))]
df_bermuda = d[(d['Time_Mid'].dt.month.isin([6])) & (d['Time_Mid'].dt.year.isin([2022])) & (~d['Time_Mid'].dt.date.isin(transit_flights))]

In [21]:
ds = [df_winter, df_spring, df_summer, df_bermuda]

In [29]:
for i in range(len(ds)):
    print(ds[i]['Time_Mid'].min(), ds[i]['Time_Mid'].max())

2020-02-14 20:03:31.500000 2022-03-26 21:23:47.500000
2021-05-13 17:05:52.500000 2021-06-30 20:33:07.500000
2020-08-17 14:30:39.500000 2020-08-28 19:53:18.500000
2022-06-03 14:46:23.500000 2022-06-17 16:56:17.500000


In [22]:
d_out = pd.DataFrame(columns = ['N', 'N_CCN', 'N_tot', 'D_c', 'k_obs', 'OMF'])

In [23]:
def print_cell(d, dgts=0):
    return f"{d.median():.{dgts}f} ({d.quantile(.25):.{dgts}f}, {d.quantile(.75):.{dgts}f})"

In [24]:
for i in range(0,4):
    d_out.loc[i,'N'] = ds[i].shape[0]
    d_out.loc[i,'N_CCN'] = print_cell(ds[i]['N_CCN_stdPT'])
    d_out.loc[i,'N_tot'] = print_cell(ds[i]['N_tot'])
    d_out.loc[i,'D_c'] = print_cell(ds[i]['D_a'], 1)
    d_out.loc[i,'k_obs'] = print_cell(ds[i]['k_obs'], 2)
    d_out.loc[i,'OMF'] = print_cell(ds[i]['OMF'], 2)

In [25]:
d_out

Unnamed: 0,N,N_CCN,N_tot,D_c,k_obs,OMF
0,4969,"460 (224, 817)","1600 (834, 3300)","75.0 (64.6, 88.7)","0.29 (0.17, 0.47)","0.46 (0.35, 0.59)"
1,3411,"487 (272, 906)","1035 (564, 2148)","78.0 (65.0, 86.4)","0.27 (0.20, 0.47)","0.58 (0.33, 0.73)"
2,34,"641 (236, 1321)","1542 (919, 2332)","75.2 (64.2, 138.2)","0.27 (0.05, 0.47)","0.64 (0.59, 0.74)"
3,806,"306 (243, 418)","496 (390, 658)","46.2 (41.4, 52.7)","1.31 (0.88, 1.83)","0.27 (0.17, 0.45)"


In [26]:
d_out.to_excel('../tables/summary_stats.xlsx', index=False)