In this notebook we calculate Stored Carbon for the ISCN available here (registration/login necessary): https://iscn.fluxdata.org/data/access-data/database-reports/

I've used the 'ISCN_SOC-DATA_LAYER_1-1' dataset for everything but Bulk Density values, that were instead taken from 'ISCNTemplate_NRCS_BD_predictions'. Stored organic carbon has been calculated only for non-organic horizons, so the value 'be_pred_2' has been used.

In [47]:
using DataFrames, CSV

Let's import the layer samplings data:

In [48]:
df = CSV.read("ISCN_SOC-DATA_LAYER_1-1.csv", DataFrame)

└ @ CSV /Users/Daniele/.julia/packages/CSV/la2cd/src/file.jl:603


Unnamed: 0_level_0,dataset_name_sub,dataset_name_soc,lat (dec. deg)
Unnamed: 0_level_1,String,String,String?
1,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
2,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
3,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
4,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
5,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
6,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
7,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
8,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
9,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219
10,AK DSC Project SOC stock computation,AK DSC Project SOC stock computation,605219


Marking columns of interest:

In [49]:
cols_of_interest = ["dataset_name_sub",
                "site_name",
                "profile_name",
                "layer_name",
                "lat (dec. deg)",
                "long (dec. deg)",
                "observation_date (YYYY-MM-DD)",
                "hzn",
                "layer_top (cm)",
                "layer_bot (cm)",
                #"bd_samp (g cm-3)",
                #"bd_tot (g cm-3)",
                #"bd_whole (g cm-3)",
                #"bd_other (g cm-3)",
                "oc (percent)",
                "soc (g cm-2)"]

12-element Array{String,1}:
 "dataset_name_sub"
 "site_name"
 "profile_name"
 "layer_name"
 "lat (dec. deg)"
 "long (dec. deg)"
 "observation_date (YYYY-MM-DD)"
 "hzn"
 "layer_top (cm)"
 "layer_bot (cm)"
 "oc (percent)"
 "soc (g cm-2)"

In [50]:
df_iscn = df[:,cols_of_interest]

dropmissing!(df_iscn,["layer_top (cm)","layer_bot (cm)"])	


Unnamed: 0_level_0,dataset_name_sub,site_name,profile_name,layer_name,lat (dec. deg)
Unnamed: 0_level_1,String,String,String,String,String?
1,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05963,605219
2,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05964,605219
3,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05965,605219
4,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05966,605219
5,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05967,605219
6,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05968,605219
7,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05969,605219
8,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05970,605219
9,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05971,605219
10,AK DSC Project SOC stock computation,S2001AK122001,01N1159,01N05972,605219


NOTE: Some layers/profiles are duplicated in some datasets, example below

In [51]:
df_iscn[df_iscn.profile_name .== "BF-61-1",:]

Unnamed: 0_level_0,dataset_name_sub,site_name,profile_name,layer_name,lat (dec. deg)
Unnamed: 0_level_1,String,String,String,String,String?
1,AK DSC Project SOC stock computation,BF-61,BF-61-1,BF61-1-F,6534931
2,AK DSC Project SOC stock computation,BF-61,BF-61-1,BF61-1-H,6534931
3,AK DSC Project SOC stock computation,BF-61,BF-61-1,BF61-1-M1,6534931
4,AK DSC Project SOC stock computation,BF-61,BF-61-1,BF61-1-M2,6534931
5,Boby_Mack,BF-61,BF-61-1,BF61-1-F,6534931
6,Boby_Mack,BF-61,BF-61-1,BF61-1-H,6534931
7,Boby_Mack,BF-61,BF-61-1,BF61-1-M1,6534931
8,Boby_Mack,BF-61,BF-61-1,BF61-1-M2,6534931


Importing dataset with BD value estimates:

In [52]:
df_bd_pred = CSV.read("ISCNTemplate_NRCS_BD_predictions.csv", DataFrame)

└ @ CSV /Users/Daniele/.julia/packages/CSV/la2cd/src/file.jl:603


Unnamed: 0_level_0,dataset_name,site_name,profile_name,layer_name,layer_top,layer_bot,hzn_desgn
Unnamed: 0_level_1,String,String,String,String,String,String,String?
1,Boby_Mack,BF-61,BF-61-1,BF61-1-F,0,4,unknown
2,Boby_Mack,BF-61,BF-61-1,BF61-1-H,4,6,unknown
3,Boby_Mack,BF-61,BF-61-1,BF61-1-M1,6,11,unknown
4,Boby_Mack,BF-61,BF-61-1,BF61-1-M2,11,16,unknown
5,Boby_Mack,BF-61,BF-61-2,BF61-2-DM,0,5,o
6,Boby_Mack,BF-61,BF-61-2,BF61-2-F,5,19,unknown
7,Boby_Mack,BF-61,BF-61-2,BF61-2-H,19,22,unknown
8,Boby_Mack,BF-61,BF-61-2,BF61-2-H:354,22,25,unknown
9,Boby_Mack,BF-61,BF-61-2,BF61-2-M1,25,30,unknown
10,Boby_Mack,BF-61,BF-61-2,BF61-2-M2,30,35,unknown


In [53]:
cols_interest_bd = ["dataset_name",
                "site_name",
                "profile_name",
                "layer_name",
                "bd_pred_2"]
                #"bd_pred_1"]

5-element Array{String,1}:
 "dataset_name"
 "site_name"
 "profile_name"
 "layer_name"
 "bd_pred_2"

NOTE: ‘bd_pred_1’ is to be used for organic horizons (hzn = O), ‘be_pred_2’ for mineral soil horizons (hzn != O).

In [54]:
df_iscn_BDpred = df_bd_pred[:,cols_interest_bd]

Unnamed: 0_level_0,dataset_name,site_name,profile_name,layer_name,bd_pred_2
Unnamed: 0_level_1,String,String,String,String,String
1,Boby_Mack,BF-61,BF-61-1,BF61-1-F,073
2,Boby_Mack,BF-61,BF-61-1,BF61-1-H,074
3,Boby_Mack,BF-61,BF-61-1,BF61-1-M1,103
4,Boby_Mack,BF-61,BF-61-1,BF61-1-M2,113
5,Boby_Mack,BF-61,BF-61-2,BF61-2-DM,036
6,Boby_Mack,BF-61,BF-61-2,BF61-2-F,067
7,Boby_Mack,BF-61,BF-61-2,BF61-2-H,077
8,Boby_Mack,BF-61,BF-61-2,BF61-2-H:354,067
9,Boby_Mack,BF-61,BF-61-2,BF61-2-M1,096
10,Boby_Mack,BF-61,BF-61-2,BF61-2-M2,101


In [55]:
df_merged = innerjoin(df_iscn,df_iscn_BDpred, on = [:dataset_name_sub => :dataset_name,:site_name,:profile_name,:layer_name])

Unnamed: 0_level_0,dataset_name_sub,site_name,profile_name,layer_name,lat (dec. deg),long (dec. deg)
Unnamed: 0_level_1,String,String,String,String,String?,String?
1,Boby_Mack,BF-79,BF-79-1,BF79-1-H,6515089,-14747617
2,Boby_Mack,BF-79,BF-79-1,BF79-1-M1,6515089,-14747617
3,Boby_Mack,BF-84,BF-84-2,BF84-2-DM,6515299,-1474782
4,Boby_Mack,BF-84,BF-84-2,BF84-2-DM:433,6515299,-1474782
5,Boby_Mack,BF-84,BF-84-2,BF84-2-F,6515299,-1474782
6,Boby_Mack,BF-84,BF-84-2,BF84-2-H,6515299,-1474782
7,Boby_Mack,BF-84,BF-84-2,BF84-2-H:436,6515299,-1474782
8,Boby_Mack,BF-84,BF-84-2,BF84-2-M1,6515299,-1474782
9,Boby_Mack,BF-84,BF-84-2,BF84-2-M2,6515299,-1474782
10,Boby_Mack,DC-31,DC-31-1,DC31-1-H,6631533,-15039722


NOTE: In the dataset, OC seems to be the % of organic carbon, while SOC is the already-computed oc-density (?).

Declaring a function for parsing values:

In [56]:
valsParse(vals) = tryparse.(Float64,replace.(vals,','=>'.'))

valsParse (generic function with 1 method)

Convenience renames:

In [57]:
names!(df_merged, [:dataset_name, 
                :site_name, 
                :profile_name,
                :layer_name,
                :lat_dd,
                :long_dd,
                :obs_date,
                :hzn,
                :layer_top,
                :layer_bot,
                :oc_perc,
                :soc_gCM2,
                :bd_pred_2]);

In [58]:
#remove non-usable OC and BD values before proceeding
dropmissing!(df_merged,:oc_perc)
dropmissing!(df_merged,:bd_pred_2)

#parsing to floats
df_merged.oc_perc = valsParse(df_merged.oc_perc)
df_merged.bd_pred_2 = valsParse(df_merged.bd_pred_2)

df_merged

Unnamed: 0_level_0,dataset_name,site_name,profile_name,layer_name,lat_dd,long_dd,obs_date
Unnamed: 0_level_1,String,String,String,String,String?,String?,String?
1,NRCS Sept/2014,50ND075005,40A0001,40A00001,4880497,-10173472,19/09/1950
2,NRCS Sept/2014,50ND075005,40A0001,40A00002,4880497,-10173472,19/09/1950
3,NRCS Sept/2014,50ND075005,40A0001,40A00003,4880497,-10173472,19/09/1950
4,NRCS Sept/2014,50ND075005,40A0001,40A00004,4880497,-10173472,19/09/1950
5,NRCS Sept/2014,50ND075005,40A0001,40A00005,4880497,-10173472,19/09/1950
6,NRCS Sept/2014,50ND075005,40A0001,40A00006,4880497,-10173472,19/09/1950
7,NRCS Sept/2014,50ND075005,40A0001,40A00007,4880497,-10173472,19/09/1950
8,NRCS Sept/2014,50ND075003,40A0013,40A00094,4893664,-10158499,18/09/1950
9,NRCS Sept/2014,50ND075003,40A0013,40A00095,4893664,-10158499,18/09/1950
10,NRCS Sept/2014,50ND075003,40A0013,40A00096,4893664,-10158499,18/09/1950


In [59]:
#remove organic layers
df_no_O_layers = df_merged[occursin.(["O"],df_merged.hzn) .== false, :]

#remove layers for which bot < top	
df_no_above_layers = df_no_O_layers[df_no_O_layers.layer_bot .> df_no_O_layers.layer_top, :]

#remove negative values of oc
df_workable = df_no_above_layers[df_no_above_layers.oc_perc .>= 0, :]

Unnamed: 0_level_0,dataset_name,site_name,profile_name,layer_name,lat_dd,long_dd,obs_date
Unnamed: 0_level_1,String,String,String,String,String?,String?,String?
1,NRCS Sept/2014,50ND075005,40A0001,40A00001,4880497,-10173472,19/09/1950
2,NRCS Sept/2014,50ND075005,40A0001,40A00002,4880497,-10173472,19/09/1950
3,NRCS Sept/2014,50ND075005,40A0001,40A00003,4880497,-10173472,19/09/1950
4,NRCS Sept/2014,50ND075005,40A0001,40A00004,4880497,-10173472,19/09/1950
5,NRCS Sept/2014,50ND075005,40A0001,40A00005,4880497,-10173472,19/09/1950
6,NRCS Sept/2014,50ND075005,40A0001,40A00006,4880497,-10173472,19/09/1950
7,NRCS Sept/2014,50ND075005,40A0001,40A00007,4880497,-10173472,19/09/1950
8,NRCS Sept/2014,50ND075003,40A0013,40A00094,4893664,-10158499,18/09/1950
9,NRCS Sept/2014,50ND075003,40A0013,40A00095,4893664,-10158499,18/09/1950
10,NRCS Sept/2014,50ND075003,40A0013,40A00096,4893664,-10158499,18/09/1950


OrgC storage calc function:

In [60]:
function orgc_storage(profile) 

    top = profile.layer_top
    bot = profile.layer_bot

    blk = profile.bd_pred_2 #[g/cm³]
    orgc = profile.oc_perc #[%] 

    layers_heights = (bot.-top)

    mass=0.0
    for layer in 1:length(layers_heights)
        #calculation for 1m² of surface, for all layers: volume x carbon/volume
        mass += (100*100*layers_heights[layer])*(blk[layer]*orgc[layer]/100)/1000 #result in [Kg]
    end

    return mass
end


orgc_storage (generic function with 1 method)

In [61]:
#profile names alone won't make an unique key, I'm using (site, profile)

sp_key = unique([(row.site_name, row.profile_name) for row in eachrow(df_workable)])

19196-element Array{Tuple{String,String},1}:
 ("50ND075005", "40A0001")
 ("50ND075003", "40A0013")
 ("50ND075007", "40A0067")
 ("59ND045001", "40A0102")
 ("50ND075006", "40A0107")
 ("50ND075008", "40A0111")
 ("50ND075009", "40A0130")
 ("50ND075010", "40A0131")
 ("S1949CO049005", "40A0138")
 ("S1949CO049007", "40A0140")
 ("S1964TN157003", "40A0158")
 ("S1958WV025003", "40A0162")
 ("67FN220000", "40A0165")
 ⋮
 ("TY23 JP", "7003023")
 ("TY24 JP", "7003024")
 ("TY25 JP", "7003025")
 ("TY26 JP", "7003026")
 ("TY28 JP", "7003028")
 ("TY29 JP", "7003029")
 ("TY30 JP", "7003030")
 ("TY31 JP", "7003031")
 ("TY32 JP", "7003032")
 ("TY33 JP", "7003033")
 ("TY34 JP", "7003034")
 ("TY35 JP", "7003035")

In [62]:
#computing orgC storage

orgc_mass = Dict()

for (s,p) in sp_key
profile_calc = df_workable[(df_workable.site_name .== s) .& (df_workable.profile_name .== p), :]
orgc_mass[(s,p)] = orgc_storage(profile_calc)
end

#storing results in DF
df_orgc = DataFrame(site_name = [s for (s,_) in sp_key], 
                   profile_name = [p for (_,p) in sp_key], 
                   stored_orgc = Float64.(values(orgc_mass)))


Unnamed: 0_level_0,site_name,profile_name,stored_orgc
Unnamed: 0_level_1,String,String,Float64
1,50ND075005,40A0001,3.81415
2,50ND075003,40A0013,8.54735
3,50ND075007,40A0067,3.94668
4,59ND045001,40A0102,18.5023
5,50ND075006,40A0107,6.55938
6,50ND075008,40A0111,15.7195
7,50ND075009,40A0130,5.94671
8,50ND075010,40A0131,10.0302
9,S1949CO049005,40A0138,12.5975
10,S1949CO049007,40A0140,8.11315


In [63]:
#retrieving profile start (min_layer) and depth

min_tops = Dict()
sol_deps = Dict()

for (s,p) in sp_key
    prf = df_workable[(df_workable.site_name .== s) .& (df_workable.profile_name .== p), [:layer_top, :layer_bot]]
    tops = prf.layer_top
    bots = prf.layer_bot

    min_tops[(s,p)] = minimum(tops)
    sol_deps[(s,p)] = maximum(bots)
end

#storing results in DF	
df_tops_bots = DataFrame(site_name = [s for (s,_) in sp_key], 
                     profile_name = [p for (_,p) in sp_key], 
                     min_layer = Float64.(values(min_tops)),
                     profile_depth = Float64.(values(sol_deps)))



Unnamed: 0_level_0,site_name,profile_name,min_layer,profile_depth
Unnamed: 0_level_1,String,String,Float64,Float64
1,50ND075005,40A0001,0.0,152.0
2,50ND075003,40A0013,0.0,46.0
3,50ND075007,40A0067,0.0,152.0
4,59ND045001,40A0102,0.0,213.0
5,50ND075006,40A0107,0.0,152.0
6,50ND075008,40A0111,0.0,153.0
7,50ND075009,40A0130,0.0,196.0
8,50ND075010,40A0131,0.0,53.0
9,S1949CO049005,40A0138,0.0,150.0
10,S1949CO049007,40A0140,0.0,208.0


In [64]:
#retrieving extra information to be joined later
df_lat_long_date = unique(df_workable[:,[:dataset_name,:site_name,:profile_name,:lat_dd,:long_dd,:obs_date]])

Unnamed: 0_level_0,dataset_name,site_name,profile_name,lat_dd,long_dd,obs_date
Unnamed: 0_level_1,String,String,String,String?,String?,String?
1,NRCS Sept/2014,50ND075005,40A0001,4880497,-10173472,19/09/1950
2,NRCS Sept/2014,50ND075003,40A0013,4893664,-10158499,18/09/1950
3,NRCS Sept/2014,50ND075007,40A0067,488083,-1017075,19/09/1950
4,NRCS Sept/2014,59ND045001,40A0102,4644944,-9837,21/08/1959
5,NRCS Sept/2014,50ND075006,40A0107,4882111,-10174722,19/09/1950
6,NRCS Sept/2014,50ND075008,40A0111,4889914,-10155444,20/09/1950
7,NRCS Sept/2014,50ND075009,40A0130,4894275,-10158694,20/09/1950
8,NRCS Sept/2014,50ND075010,40A0131,4891528,-10160583,20/09/1950
9,NRCS Sept/2014,S1949CO049005,40A0138,missing,missing,02/09/1949
10,NRCS Sept/2014,S1949CO049007,40A0140,missing,missing,31/08/1949


In [65]:
#first join
df_join_first = innerjoin(df_lat_long_date,df_orgc, on = [:site_name,:profile_name]);

In [66]:
#final results
df_final = innerjoin(df_join_first, df_tops_bots, on = [:site_name,:profile_name])

Unnamed: 0_level_0,dataset_name,site_name,profile_name,lat_dd,long_dd,obs_date
Unnamed: 0_level_1,String,String,String,String?,String?,String?
1,NRCS Sept/2014,50ND075005,40A0001,4880497,-10173472,19/09/1950
2,NRCS Sept/2014,50ND075003,40A0013,4893664,-10158499,18/09/1950
3,NRCS Sept/2014,50ND075007,40A0067,488083,-1017075,19/09/1950
4,NRCS Sept/2014,59ND045001,40A0102,4644944,-9837,21/08/1959
5,NRCS Sept/2014,50ND075006,40A0107,4882111,-10174722,19/09/1950
6,NRCS Sept/2014,50ND075008,40A0111,4889914,-10155444,20/09/1950
7,NRCS Sept/2014,50ND075009,40A0130,4894275,-10158694,20/09/1950
8,NRCS Sept/2014,50ND075010,40A0131,4891528,-10160583,20/09/1950
9,NRCS Sept/2014,S1949CO049005,40A0138,missing,missing,02/09/1949
10,NRCS Sept/2014,S1949CO049007,40A0140,missing,missing,31/08/1949


In [67]:
CSV.write("stored_orgc_ISCN_jup.csv", df_final)

"stored_orgc_ISCN_jup.csv"