# LOAD LIBRARIES

In [1]:
import numpy as np
import pandas as pd 

# READ DATA

In [2]:
path_sources = '../data/raw/n2o_mixing_endmembers.csv' # mixing end members from Yu et al. 2020 + stdev of analyzer based on measurements on samples from Iris Feigenwinter 
path_isotopes = '../data/processed/SOURCES_isotopes.csv' # measured data
sources = pd.read_csv(path_sources)
iso_data = pd.read_csv(path_isotopes)

print(sources.head())
print()
print(iso_data.head())


  source delta   min   max  stdev
0     bD    SP  -7.5   3.7    2.8
1     nD    SP -13.6   1.9    4.9
2     fD    SP  27.2  39.9    2.5
3     Ni    SP  32.0  38.7    2.0
4     bD  d18O  16.7  23.3    1.4

         date  chamber          SP       d15N        d18O  diff_n2o_ppb  \
0  2023-02-24        1 -101.499579 -45.115100   27.147584     10.012610   
1  2023-02-28        1 -304.728830  53.841004  150.925490      1.719556   
2  2023-03-01        1   18.213941 -29.436341   17.273349   4779.619396   
3  2023-03-03        1    5.489893 -23.678948   18.781280   4496.289482   
4  2023-03-06        1   -0.199149 -27.943262   20.818652   1843.023597   

   flux_nmol  d18O_water  
0   0.027814   -9.043810  
1   0.005069   -8.902047  
2  14.025316   -8.335530  
3  12.993894  -10.864481  
4   5.272311   -9.530335  


First of all let's filter out data with a too low difference in N2O concentration -> unreliable data

In [3]:
threshold = 30
iso_data = iso_data[iso_data['diff_n2o_ppb'] > threshold].dropna().reset_index()
iso_data

Unnamed: 0,index,date,chamber,SP,d15N,d18O,diff_n2o_ppb,flux_nmol,d18O_water
0,2,2023-03-01,1,18.213941,-29.436341,17.273349,4779.619396,14.025316,-8.33553
1,3,2023-03-03,1,5.489893,-23.678948,18.78128,4496.289482,12.993894,-10.864481
2,4,2023-03-06,1,-0.199149,-27.943262,20.818652,1843.023597,5.272311,-9.530335
3,12,2023-03-06,2,2.289056,-26.599452,33.210678,47.108978,0.134764,-9.89399
4,18,2023-03-01,3,16.211476,-25.740525,18.961443,5296.327715,15.541545,-8.74182
5,19,2023-03-03,3,-0.130329,-21.078954,19.088668,2643.390218,7.639173,-10.314688
6,20,2023-03-06,3,-4.179072,-22.605849,19.538232,389.707596,1.114831,-10.056549
7,21,2023-03-17,3,-2.373949,-13.48928,28.669395,303.798338,0.848329,-10.256006
8,26,2023-03-01,4,-1.378079,-25.445839,22.037094,926.302519,2.718142,-8.840135
9,27,2023-03-03,4,-3.755073,-27.393237,24.454761,127.826654,0.369408,-10.226242


# FORMATTING

## SOURCES

Normalize endmembers (except for Ni) of d18O-N2O in relation to soil water

In [4]:
d18O_soil_water = np.mean(iso_data['d18O_water'])
print(f'Average soil water d18O is {d18O_soil_water}')

processes = ['bD', 'fD', 'nD']
sources_norm = sources.copy()
for p in processes:
    sources_norm.loc[(sources_norm['delta'] == 'd18O') & (sources_norm['source'] == p), 'min'] = sources.loc[(sources['delta'] == 'd18O') & (sources['source'] == p), 'min'] + d18O_soil_water
    sources_norm.loc[(sources_norm['delta'] == 'd18O') & (sources_norm['source'] == p), 'max'] = sources.loc[(sources['delta'] == 'd18O') & (sources['source'] == p), 'max'] + d18O_soil_water

sources_norm

Average soil water d18O is -9.241605815533335


Unnamed: 0,source,delta,min,max,stdev
0,bD,SP,-7.5,3.7,2.8
1,nD,SP,-13.6,1.9,4.9
2,fD,SP,27.2,39.9,2.5
3,Ni,SP,32.0,38.7,2.0
4,bD,d18O,7.458394,14.058394,1.4
5,nD,d18O,3.158394,10.158394,1.5
6,fD,d18O,32.758394,45.858394,3.8
7,Ni,d18O,20.5,26.5,2.1
8,bD,d15N,-52.8,2.3,5.2
9,nD,d15N,-60.7,-53.1,1.0


* Mean of the range reported by Yu et al. (2020)
* Standard deviation from stdev of measurements from the same analyzer in previous measurements for Iris Feigenwinter  
(during the measurements of these samples, the stdev of the instrument could not be assessed by Matti)

In [5]:
# Calculate the mean of endmembers range from min max reported by Yu et al. (2020)
sources_norm['mean'] = (sources_norm['min'] + sources_norm['max'])/2
#sources_norm['spread'] = np.abs((sources_norm['max'] - sources_norm['min'])/2)
sources_norm['spread'] = 0 # use stddev instead of spread --> set spread to zero


# Pivot the DataFrame 
pivot_mean = sources_norm.pivot_table(index='source', columns='delta', values='mean').reset_index()
pivot_stdev = sources_norm.pivot_table(index='source', columns='delta', values='stdev').reset_index()
pivot_spread = sources_norm.pivot_table(index='source', columns='delta', values='spread').reset_index()

# Create a new DataFrame for the required FRAME source input format
sources_df = pivot_mean.copy()
sources_df.columns.name = None

# Add the 'spread' columns to `sources_df`
for col in pivot_spread.columns[1:]:  # Skip the 'source' column
    sources_df[f'spread({col})'] = pivot_spread[col]

# Add 'stdev' columns 
for col in pivot_stdev.columns[1:]:  # Skip the 'source' column
    sources_df[f'stdev({col})'] = pivot_spread[col]

# Optionally reorder columns to match your desired format
ordered_columns = ['source']
for delta in sources['delta'].unique():
    ordered_columns += [delta, f'stdev({delta})', f'spread({delta})']

sources_df = sources_df[ordered_columns]

sources_df

Unnamed: 0,source,SP,stdev(SP),spread(SP),d18O,stdev(d18O),spread(d18O),d15N,stdev(d15N),spread(d15N)
0,Ni,35.35,0.0,0.0,23.5,0.0,0.0,-55.5,0.0,0.0
1,bD,-1.9,0.0,0.0,10.758394,0.0,0.0,-25.25,0.0,0.0
2,fD,33.55,0.0,0.0,39.308394,0.0,0.0,-38.5,0.0,0.0
3,nD,-5.85,0.0,0.0,6.658394,0.0,0.0,-56.9,0.0,0.0


## SAMPLES

We are not aggregating the samples --> stdev=0

In [6]:
n2o_samples = iso_data.copy()
for i in range(len(n2o_samples)):
    n2o_samples.loc[i, 'label'] = f"C{iso_data.loc[i, 'chamber']}_{iso_data.loc[i, 'date']}" # label column as a combination of chamber number and date
    n2o_samples.loc[i, 'group'] = i + 1 # group column

n2o_samples['group'] = n2o_samples['group'].astype(int) # change group column to integer data type

# Add columns of stdev
stdev = 1
n2o_samples['stdev(SP)'] = stdev
n2o_samples['stdev(d18O)'] = stdev
n2o_samples['stdev(d15N)'] = stdev

# Sort columns properly
custom_order = ['label', 'group', 'SP', 'stdev(SP)', 'd18O', 'stdev(d18O)', 'd15N', 'stdev(d15N)']
n2o_samples = n2o_samples[custom_order]

n2o_samples

Unnamed: 0,label,group,SP,stdev(SP),d18O,stdev(d18O),d15N,stdev(d15N)
0,C1_2023-03-01,1,18.213941,1,17.273349,1,-29.436341,1
1,C1_2023-03-03,2,5.489893,1,18.78128,1,-23.678948,1
2,C1_2023-03-06,3,-0.199149,1,20.818652,1,-27.943262,1
3,C2_2023-03-06,4,2.289056,1,33.210678,1,-26.599452,1
4,C3_2023-03-01,5,16.211476,1,18.961443,1,-25.740525,1
5,C3_2023-03-03,6,-0.130329,1,19.088668,1,-21.078954,1
6,C3_2023-03-06,7,-4.179072,1,19.538232,1,-22.605849,1
7,C3_2023-03-17,8,-2.373949,1,28.669395,1,-13.48928,1
8,C4_2023-03-01,9,-1.378079,1,22.037094,1,-25.445839,1
9,C4_2023-03-03,10,-3.755073,1,24.454761,1,-27.393237,1


## FRACTIONATION

According to Yu et al. (2020)

In [7]:
# Define the file name
file_name = "data/N2O_fractionation_FRAME.csv"

# Define the data using a dictionary
data = {
    "name": ["E"],
    "SP": [-5.9],
    "stdev(SP)": [1.4],
    "spread(SP)": [0],
    "d18O": [-15.4],
    "stdev(d18O)": [4.7],
    "spread(d18O)": [0],
    "d15N": [-7.1],
    "stdev(d15N)": [2.1],
    "spread(d15N)": [0]
}

# Convert dictionary to DataFrame
df_frac = pd.DataFrame(data)
df_frac

Unnamed: 0,name,SP,stdev(SP),spread(SP),d18O,stdev(d18O),spread(d18O),d15N,stdev(d15N),spread(d15N)
0,E,-5.9,1.4,0,-15.4,4.7,0,-7.1,2.1,0


# EXPORT DATA

In [9]:
sources_df.to_csv('../data/processed/FRAME/N2O_sources_FRAME.csv', index=False)
n2o_samples.to_csv('../data/processed/FRAME/N2O_samples_FRAME.csv', index=False)

# Rayleigh fractionation for N2O reduction from a mixed source:
# M0[i] = initial isotope value after mixing of sources,
# E[i] = enrichment (fractionation) factor for N2O reduction,
# r = remaining N2O fraction (0â€“1); as r decreases, residual N2O becomes isotope-enriched
with open('../data/processed/FRAME/N2O_fractionation_FRAME.csv', "w") as file:
    file.write("M0[i] + E[i] * log(r),,,,,,,,,\n") 
    df_frac.to_csv(file, index=False)