In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

path="initial-data/"

### explanation of the columns
- Source: citation (author(s) + year)
- Source_abbrev
- Latitude
- Longitude
- Depth
- Material: Calcite or Aragonite or Mg-calcite
- Sample: more detailed description of sample (such as foram type)
- Device (True or False): did they use a device that cycled the water around the sample 
- Biogenic (True or False): is the sample from biogenic origin (e.g. forams)
- Organics (True or False): False if biogenic sample was bleached or if sample is inorganic
- Dissrate (with units, either % d-1 or mg cm-2 d-1), also extra column with Error if given 
- Fragmentation: Fragmentation of sample pieces in %
- Year: year the expedition took place (if unknown it is the year of the publication)
- Month: month the expedition took place (most are over several months, so I take a month in the middle)
- Mesh: mesh size in um
- Size: size of sample pieces in XS to XL (description of size is in comments in um)
- Deployment_d: how long the sample was in the ocean
- Comments: additional info
- Rate_mass: % d-1, mass normalized
- Rate_sa: mg cm-2 d-1, surface area normalized


#### Sizes
- XXXS: < 10 um 
- XXS: 10 - 53 um
- XS: 62 - 125 um
- S: 125 - 177 um
- M: 177 - 250 um
- L: 250 - 420 um
- XL: >420 um
- XXL: 0.7 - 1mm, >831 um

### add Peterson

choose either the averaged or individual data (adjust at b67 which one is getting merged)

In [2]:
p66_calc_avg = pd.read_csv(path+'Peterson_1966_calcitedissrates_avg.csv')

#make sure it is sorted by depth
p66_calc_avg = p66_calc_avg.sort_values("Depth")

#add other columns
p66_calc_avg["Source"] = "Peterson, 1966"
p66_calc_avg["Source_abbrev"] = "P66"
p66_calc_avg["Latitude"] = 18.8
p66_calc_avg["Longitude"] = -168.5
p66_calc_avg["Sample"] = "Calcite crystals"
p66_calc_avg["Material"] = "Calcite"
p66_calc_avg["Rate_sa"] = p66_calc_avg["Rate"]/365
p66_calc_avg["Rate_error_sa"] = 0.012/365
p66_calc_avg["Organics"] = False
p66_calc_avg["Device"] = False
p66_calc_avg["Deployment_d"] = 125
p66_calc_avg["Biogenic"] = False
p66_calc_avg["Year"] = 1966  #no data given for deployment time

p66_calc_avg["Comments"] = "averages over 5 adjacent samples"

#drop not needed column
p66_calc_avg = p66_calc_avg.drop("Rate", axis=1)

In [3]:
p66_calc_indiv = pd.read_csv(path+'Peterson_1966_calcitedissrates_indiv.csv')

#make sure it is sorted by depth
p66_calc_indiv = p66_calc_indiv.sort_values("Depth")

#add other columns
p66_calc_indiv["Source"] = "Peterson, 1966"
p66_calc_indiv["Source_abbrev"] = "P66"
p66_calc_indiv["Latitude"] = 18.8
p66_calc_indiv["Longitude"] = -168.5
p66_calc_indiv["Sample"] = "Calcite crystals"
p66_calc_indiv["Material"] = "Calcite"
p66_calc_indiv["Rate_sa"] = p66_calc_indiv["Rate"]/365
p66_calc_indiv["Rate_error_sa"] = 0.012/365
p66_calc_indiv["Organics"] = False
p66_calc_indiv["Device"] = False
p66_calc_indiv["Deployment_d"] = 125
p66_calc_indiv["Biogenic"] = False 
p66_calc_indiv["Year"] = 1966  #no data given for deployment time

p66_calc_indiv["Comments"] = "individual results"

#drop not needed column
p66_calc_indiv = p66_calc_indiv.drop("Rate", axis=1)



### add Berger 1967

In [4]:
#Berger 1967
#one batch is treated with H2O2 to get rid or organic stuff
#units: %weight loss
b67_foram = pd.read_csv(path+'Berger_1967_H2O2treated_foramsdissrates.csv')
b67_foram_o = pd.read_csv(path+'Berger_1967_untreated_foramsdissrates.csv')

#---------------------------------------------------------------
#make sure it is sorted by depth
b67_foram = b67_foram.sort_values("Depth")

#add other columns
b67_foram["Source"] = "Berger, 1967"
b67_foram["Source_abbrev"] = "B67"
b67_foram["Latitude"] = 18.82
b67_foram["Longitude"] = -168.51
b67_foram["Sample"] = "Foraminifera"
b67_foram["Material"] = "Calcite"
b67_foram["Organics"] = False
b67_foram["Mesh"] = 62
b67_foram["Device"] = False
b67_foram["Deployment_d"] = 121
b67_foram["Biogenic"] = True
#b67_foram["Size"] = 'mixed'
b67_foram["Year"] = 1967 #no data given for deployment time


#rate stuff
try:
    #rate %weight loss but the sample was in for 4 months (121 days)
    b67_foram["Rate_mass"] = b67_foram["Rate"]/121   
    b67_foram = b67_foram.drop("Rate", axis=1)    
except:
    b67_foram
    
b67_foram["Comments"] = "sample from sediments, bleached"


#----------------------------------------------------------
#make sure it is sorted by depth
b67_foram_o = b67_foram_o.sort_values("Depth")

#add other columns
b67_foram_o["Source"] = "Berger, 1967"
b67_foram_o["Source_abbrev"] = "B67"
b67_foram_o["Latitude"] = 18.82
b67_foram_o["Longitude"] = -168.51
b67_foram_o["Sample"] = "Foraminifera"
b67_foram_o["Material"] = "Calcite"
b67_foram_o["Organics"] = True
b67_foram_o["Mesh"] = 62
b67_foram_o["Device"] = False
b67_foram_o["Deployment_d"] = 121
b67_foram_o["Biogenic"] = True
#b67_foram_o["Size"] = 'mixed'
b67_foram_o["Year"] = 1967 #no data given for deployment time


#rate stuff
try:
    #rate %weight loss but the sample was in for 4 months (121 days)
    b67_foram_o["Rate_mass"] = b67_foram_o["Rate"]/121    
    b67_foram_o = b67_foram_o.drop("Rate", axis=1)    
except:
    b67_foram_o
    
b67_foram_o["Comments"] = "sample from sediments, untreated"

#-------------------------------------
#merge
b67 = pd.merge(b67_foram, b67_foram_o, how="outer")

#merge to big
df = pd.merge(p66_calc_indiv, b67, how="outer")

### add Milliman 1977
(Milliman said that his 1975 is bad so I will not add that to the list)

In [5]:
m77_1 = pd.read_csv(path+'Milliman_1977_Station1.csv')
m77_543 = pd.read_csv(path+'Milliman_1977_Station543.csv')
m77_545 = pd.read_csv(path+'Milliman_1977_Station545.csv')
m77_548 = pd.read_csv(path+'Milliman_1977_Station548.csv')

#add position
m77_1["Latitude"] = 23.3
m77_1["Longitude"] = -70.5
m77_1["Deployment_d"] = 121
m77_1["Year"] = 1974

m77_543["Latitude"] = 23.1
m77_543["Longitude"] = -65.2
m77_543["Deployment_d"] = 182
m77_543["Year"] = 1974

m77_545["Latitude"] = 22.8
m77_545["Longitude"] = -55.1
m77_545["Deployment_d"] = 182
m77_545["Year"] = 1974

m77_548["Latitude"] = 25.9
m77_548["Longitude"] = -60.3
m77_548["Deployment_d"] = 182
m77_548["Year"] = 1974

#drop first row with X any Y and convert everything to floats instead of strings
m77_1 = m77_1.drop(0).astype("float")
m77_543 = m77_543.drop(0).astype("float")
m77_545 = m77_545.drop(0).astype("float")
m77_548 = m77_548.drop(0).astype("float")

m77_1["Month"] = 2
m77_543["Month"] = 9
m77_545["Month"] = 9
m77_548["Month"] = 9

m77_data = [m77_1, m77_543, m77_545, m77_548]


m77 = pd.DataFrame(columns=["Latitude", "Longitude"])

for s in m77_data:    
    #separate
    s_calc = s[["Calcite", "Unnamed: 1", "Latitude", "Longitude", "Deployment_d", "Year", "Month"]]
    s_arag = s[["Aragonite", "Unnamed: 3", "Latitude", "Longitude", "Deployment_d", "Year", "Month"]]
    s_mg = s[["Mg-calcite", "Unnamed: 5", "Latitude", "Longitude", "Deployment_d", "Year", "Month"]]
        
    #name stuff correctly
    s_calc = s_calc.rename(columns={"Unnamed: 1": "Depth"})
    s_arag = s_arag.rename(columns={"Unnamed: 3": "Depth"})
    s_mg = s_mg.rename(columns={"Unnamed: 5": "Depth"})
    
    #sort by depth
    s_calc = s_calc.sort_values("Depth")
    s_arag = s_arag.sort_values("Depth")
    s_mg = s_mg.sort_values("Depth")
    
    #if no depth in row, delete
    s_calc = s_calc[s_calc['Depth'].notna()]
    s_arag = s_arag[s_arag['Depth'].notna()]
    s_mg = s_mg[s_mg['Depth'].notna()]
    
    # change dissolution rate from %loss per year to % d-1 and put into correct columns
    s_calc["Rate_mass"] = s_calc["Calcite"]/365    
    s_calc = s_calc.drop("Calcite", axis=1)  
    
    s_arag["Rate_mass"] = s_arag["Aragonite"]/365    
    s_arag = s_arag.drop("Aragonite", axis=1)  
    
    s_mg["Rate_mass"] = s_mg["Mg-calcite"]/365    
    s_mg = s_mg.drop("Mg-calcite", axis=1)  
    
    #add columns with additional info
    s_calc["Sample"] = "Foraminifera"
    s_arag["Sample"] = "Aragonite ooids"
    s_mg["Sample"] = "Mg-calcite ooids"
    
    s_calc["Material"] = "Calcite"
    s_arag["Material"] = "Aragonite"
    s_mg["Material"] = "Mg-calcite"
    
    s_mg["Comments"] = "Size: 250-500 um, 12 mole % MgCO3"
    s_arag["Comments"] = "Size: 250-500 um, "
    s_calc["Comments"] = "Size: 250-500 um, Spherical Forams, orbulina universa and globigerinoides sacculifer"
    
      
    #merge all together
    m77 = pd.merge(m77, s_calc, how="outer")
    m77 = pd.merge(m77, s_arag, how="outer")
    m77 = pd.merge(m77, s_mg, how="outer")
    

# add stuff that is the same for all three
m77["Organics"] = True
m77["Source"] = "Milliman, 1977"
m77["Source_abbrev"] = "M77"
m77["Device"] = False
m77["Biogenic"] = False
m77["Size"] = "L"
m77["Mesh"] = 40

#merge
df = pd.merge(df, m77, how="outer")


### add Honjo and Erez 1978

In [6]:
#Honjo and Erez, 1978
#had many different samples (12), most of them 0 rate for 2 of the 3 depth layers
#units: %weight loss
he78 = pd.read_csv(path+'HonjoErez_dissrates.csv')
he78_surf = pd.read_csv(path+'HonjoErez_dissrates_surf.csv')

#Reagent calcite
he78_rc = he78[["Depth", "Reagent calcite"]].copy()
he78_rc["Rate_mass"] = he78_rc["Reagent calcite"]/79
he78_rc["Rate_sa"] = he78_surf["Reagent calcite"]/365
he78_rc = he78_rc.drop("Reagent calcite", axis=1)
he78_rc["Sample"] = "Reagent Calcite"
he78_rc["Material"] = "Calcite"
he78_rc["Organics"] = False
he78_rc["Comments"] = "precipitated crystals, size: 10-53 um"
he78_rc["Biogenic"] = False
he78_rc["Size"] = "XXS"

#Large calcite crystal
he78_lcc = he78[["Depth", "Large calcite crystals"]].copy()
he78_lcc["Rate_mass"] = he78_lcc["Large calcite crystals"]/79
he78_lcc["Rate_sa"] = he78_surf["Large calcite crystals"]/365
he78_lcc = he78_lcc.drop("Large calcite crystals", axis=1)
he78_lcc["Sample"] = "Calcite crystal"
he78_lcc["Material"] = "Calcite"
he78_lcc["Organics"] = False
he78_lcc["Comments"] = "large crystals, size: 0.7-1 mm"
he78_lcc["Biogenic"] = False
he78_lcc["Size"] = "XXL"

#Synthetic aragonite
he78_sa = he78[["Depth", "Synthetic aragonite"]].copy()
he78_sa["Rate_mass"] = he78_sa["Synthetic aragonite"]/79
he78_sa["Rate_sa"] = he78_surf["Synthetic aragonite"]/365
he78_sa = he78_sa.drop("Synthetic aragonite", axis=1)
he78_sa["Sample"] = "Synthetic Aragonite"
he78_sa["Material"] = "Aragonite"
he78_sa["Organics"] = False
he78_sa["Comments"] = "prepared in lab, size: 10-53 um"
he78_sa["Biogenic"] = False
he78_sa["Size"] = "XXS"

#Foraminifera assemblage
he78_forams_o = he78[["Depth", "Foraminifera assemblage"]].copy()
he78_forams_o["Rate_mass"] = he78_forams_o["Foraminifera assemblage"]/79
he78_forams_o["Rate_sa"] = he78_surf["Foraminifera assemblage"]/365
he78_forams_o = he78_forams_o.drop("Foraminifera assemblage", axis=1)
he78_forams_o["Sample"] = "Foraminifera"
he78_forams_o["Material"] = "Calcite"
he78_forams_o["Organics"] = True
he78_forams_o["Comments"] = "Foraminifera assemblage (>20 species), core (2190 m), size: 63-1000 um"
he78_forams_o["Biogenic"] = True

#bleached foraminifera assemblage
he78_forams = he78[["Depth", "bleached foraminifera assemblage"]].copy()
he78_forams["Rate_mass"] = he78_forams["bleached foraminifera assemblage"]/79
he78_forams["Rate_sa"] = he78_surf["bleached foraminifera assemblage"]/365
he78_forams = he78_forams.drop("bleached foraminifera assemblage", axis=1)
he78_forams["Sample"] = "Foraminifera"
he78_forams["Material"] = "Calcite"
he78_forams["Organics"] = False
he78_forams["Comments"] = "bleached with NaOCl, Foraminifera assemblage (>20 species), core (2190 m), size: 63-1000 um"
he78_forams["Biogenic"] = True

#G sacculifera
he78_sacc = he78[["Depth", "G sacculifera"]].copy()
he78_sacc["Rate_mass"] = he78_sacc["G sacculifera"]/79
he78_sacc["Rate_sa"] = he78_surf["G sacculifera"]/365
he78_sacc = he78_sacc.drop("G sacculifera", axis=1)
he78_sacc["Sample"] = "G. sacculifera (Forams)"
he78_sacc["Material"] = "Calcite"
he78_sacc["Organics"] = True
he78_sacc["Comments"] = "Globigerinoides sacculifer, handpicked from core (3711 m), size: >250 um"
he78_sacc["Biogenic"] = True
he78_sacc["Size"] = "L"


#G bulloides
he78_bull = he78[["Depth", "G bulloides"]].copy()
he78_bull["Rate_mass"] = he78_bull["G bulloides"]/79
he78_bull["Rate_sa"] = he78_surf["G bulloides"]/365
he78_bull = he78_bull.drop("G bulloides", axis=1)
he78_bull["Sample"] = "G. bulloides (Forams)"
he78_bull["Material"] = "Calcite"
he78_bull["Organics"] = True
he78_bull["Comments"] = "Globigerina bulloides, handpicked from core (3744 m), size: >250 um"
he78_bull["Biogenic"] = True
he78_bull["Size"] = "L"

#G pachyderma
he78_pach = he78[["Depth", "G pachyderma"]].copy()
he78_pach["Rate_mass"] = he78_pach["G pachyderma"]/79
he78_pach["Rate_sa"] = he78_surf["G pachyderma"]/365
he78_pach = he78_pach.drop("G pachyderma", axis=1)
he78_pach["Sample"] = "G. pachyderma (Forams)"
he78_pach["Material"] = "Calcite"
he78_pach["Organics"] = True
he78_pach["Comments"] = "Globigerina pachyderma, handpicked from core (2736 m), size: >149 um"
he78_pach["Biogenic"] = True
he78_pach["Size"] = "M"

#E huxleyi
he78_hux_o = he78[["Depth", "E huxleyi"]].copy()
he78_hux_o["Rate_mass"] = he78_hux_o["E huxleyi"]/79
he78_hux_o["Rate_sa"] = he78_surf["E huxleyi"]/365
he78_hux_o = he78_hux_o.drop("E huxleyi", axis=1)
he78_hux_o["Sample"] = "E. huxleyi (Coccoliths)"
he78_hux_o["Material"] = "Calcite"
he78_hux_o["Organics"] = True
he78_hux_o["Comments"] = "Emilliania huxleyi, cultured in lab"
he78_hux_o["Biogenic"] = True
he78_hux_o["Size"] = "XXXS"

#Bleached E huxleyi
he78_hux = he78[["Depth", "Bleached E huxleyi"]].copy()
he78_hux["Rate_mass"] = he78_hux["Bleached E huxleyi"]/79
he78_hux["Rate_sa"] = he78_surf["Bleached E huxleyi"]/365
he78_hux = he78_hux.drop("Bleached E huxleyi", axis=1)
he78_hux["Sample"] = "E. huxleyi (Coccoliths)"
he78_hux["Material"] = "Calcite"
he78_hux["Organics"] = False
he78_hux["Comments"] = "Emilliania huxleyi, cultured in lab, bleached with NaOCl"
he78_hux["Biogenic"] = True
he78_hux["Size"] = "XXXS"

#C neohelis
he78_neo = he78[["Depth", "C neohelis"]].copy()
he78_neo["Rate_mass"] = he78_neo["C neohelis"]/79
he78_neo["Rate_sa"] = he78_surf["C neohelis"]/365
he78_neo = he78_neo.drop("C neohelis", axis=1)
he78_neo["Sample"] = "C. neohelis (Coccoliths)"
he78_neo["Material"] = "Calcite"
he78_neo["Organics"] = True
he78_neo["Comments"] = "Cruciplacolithus neohelis, cultured in lab"
he78_neo["Biogenic"] = True
he78_neo["Size"] = "XXXS"

#Pteropod assemblage
he78_pter = he78[["Depth", "Pteropod assemblage"]].copy()
he78_pter["Rate_mass"] = he78_pter["Pteropod assemblage"]/79
he78_pter["Rate_sa"] = he78_surf["Pteropod assemblage"]/365
he78_pter = he78_pter.drop("Pteropod assemblage", axis=1)
he78_pter["Sample"] = "Pteropods"
he78_pter["Material"] = "Aragonite"
he78_pter["Organics"] = True
he78_pter["Comments"] = "Pteropod assemblage (>5 species), size >831 um, from core (2293 m)"
he78_pter["Biogenic"] = True
he78_pter["Size"] = "XXL"

#Diatoms
he78_dia = he78[["Depth", "Diatom, Coscinnoidiscus"]].copy()
he78_dia["Rate_mass"] = he78_dia["Diatom, Coscinnoidiscus"]/79
he78_dia["Rate_sa"] = he78_surf["Diatom, Coscinnoidiscus"]/365
he78_dia = he78_dia.drop("Diatom, Coscinnoidiscus", axis=1)
he78_dia["Sample"] = "Diatoms"
he78_dia["Organics"] = False
he78_dia["Comments"] = "Diatoms, Coscinnoidiscus species, bleached with NaOCl, cultured in lab"
he78_dia["Biogenic"] = True
he78_dia["Material"] = "Silica"

he78_all = [he78_dia, he78_pter, he78_neo, he78_hux, he78_hux_o, he78_pach, he78_bull, he78_sacc,
            he78_forams, he78_forams_o, he78_sa, he78_lcc, he78_rc]

he78 = pd.concat(he78_all, join='outer', axis=0)

he78["Depth"] = he78["Depth"].astype("float")
he78["Deployment_d"] = 79
he78["Latitude"] = 32.37
he78["Longitude"] = -55.0
he78["Source"] = "Honjo and Erez (1987)"
he78["Source_abbrev"] = "HE78"
he78["Device"] = True
he78["Month"] = 11
he78["Year"] = 1976
he78["Mesh"] = 0.6

#merge
df = pd.merge(df, he78, how="outer")

### add Thunell 1981

In [7]:
t81_foram_o = pd.read_csv(path+'Thunell_1981_foramdissrates.csv')

#make sure it is sorted by depth
t81_foram_o = t81_foram_o.sort_values("Depth")

#add other columns
t81_foram_o["Source"] = "Thunell et al., 1981"
t81_foram_o["Source_abbrev"] = "T81"
t81_foram_o["Latitude"] = 4.0
t81_foram_o["Longitude"] = -82.0
t81_foram_o["Sample"] = "Foraminifera"
t81_foram_o["Material"] = "Calcite"
t81_foram_o["Organics"] = True
t81_foram_o["Device"] = False
t81_foram_o["Mesh"] = 100
t81_foram_o["Size"] = "L"

t81_foram_o = t81_foram_o.rename(columns={"Fragmentation": "Fragmentation_pct"})

#rate stuff
try:
    #rate %weight loss but the sample was in for 4 months (123 days)
    t81_foram_o["Rate_mass"] = t81_foram_o["Rate"]/123   
    t81_foram_o = t81_foram_o.drop("Rate", axis=1)    
except:
    t81_foram_o
    
t81_foram_o["Comments"] = "Size >250 um, sample from sediments (4140 m)"
t81_foram_o["Deployment_d"] = 123
t81_foram_o["Biogenic"] = True
t81_foram_o["Year"] = 1979
t81_foram_o["Month"] = 8


#merge
df = pd.merge(df, t81_foram_o, how="outer")



### add Metzler 1982

In [8]:
#Metzler 1982
#sorted the samples in 5 different size fractions, fraction 5 is smallest
#units: %weight loss (for all fractions), %fragmentation (for fraction 3 and 5)
m82_1_foram_o = pd.read_csv(path+'Metzler_1982_foramdissrates_site1.csv')
m82_2_foram_o = pd.read_csv(path+'Metzler_1982_foramdissrates_site2.csv')


m82_1_foram_o["Latitude"] = 0.0 
m82_1_foram_o["Longitude"] = -152.2

m82_2_foram_o["Latitude"] = 0.7
m82_2_foram_o["Longitude"] = -153.1

m82_1_foram_o["Deployment_d"] = 123
m82_2_foram_o["Deployment_d"] = 118

m82_1_foram_o["Mesh"] = 44
#only station 1 gets mesh size because mesh size at station 2 didn't matter 
#since samples got lost due to other reasons (movement in mooring)
m82_2_foram_o["Mesh"] = np.nan

m82_1_foram_o["Comments"] = "Station 1, samples from sediment"
m82_2_foram_o["Comments"] = "Station 2, samples from sediment, mechanical loss of samples"

m82_1_foram_o[["Rate Frac1", "Rate Frac2", "Rate Frac3", "Rate Frac4", "Rate Frac5"]] = m82_1_foram_o[[
    "Rate Frac1", "Rate Frac2", "Rate Frac3", "Rate Frac4", "Rate Frac5"]]/123

m82_2_foram_o[["Rate Frac1", "Rate Frac2", "Rate Frac3", "Rate Frac4", "Rate Frac5"]] = m82_2_foram_o[[
    "Rate Frac1", "Rate Frac2", "Rate Frac3", "Rate Frac4", "Rate Frac5"]]/118

#don't include station 2
#m82_data = [m82_1_foram_o, m82_2_foram_o]
m82_data = [m82_1_foram_o]

m82 = pd.DataFrame(columns=["Latitude", "Longitude"])

for m in m82_data:
    m = m.sort_values("Depth")
    m["Depth"] = m["Depth"].astype("float")
    
    #separate
    m1 = m[["Depth", "Rate Frac1", "Longitude", "Latitude", "Comments", "Deployment_d", "Mesh"]]
    m2 = m[["Depth", "Rate Frac2", "Longitude", "Latitude", "Comments", "Deployment_d", "Mesh"]]
    m3 = m[["Depth", "Rate Frac3", "Fragments Frac3", "Longitude", "Latitude", "Comments", "Deployment_d", "Mesh"]]
    m4 = m[["Depth", "Rate Frac4", "Longitude", "Latitude", "Comments", "Deployment_d", "Mesh"]]
    m5 = m[["Depth", "Rate Frac5", "Fragments Frac5", "Longitude", "Latitude", "Comments", "Deployment_d", "Mesh"]]
    
    #rename
    m1 = m1.rename(columns={"Rate Frac1": "Rate_mass"})
    m2 = m2.rename(columns={"Rate Frac2": "Rate_mass"})
    m3 = m3.rename(columns={"Rate Frac3": "Rate_mass"})
    m4 = m4.rename(columns={"Rate Frac4": "Rate_mass"})
    m5 = m5.rename(columns={"Rate Frac5": "Rate_mass"})
    
    m3 = m3.rename(columns={"Fragments Frac3": "Fragmentation_pct"})
    m5 = m5.rename(columns={"Fragments Frac5": "Fragmentation_pct"})
    
    # add additional info
    m1["Size"] = "XL"
    m2["Size"] = "L"
    m3["Size"] = "M"
    m4["Size"] = "S"
    m5["Size"] = "XS"
    
    m1["Comments"] = m1["Comments"]+", XL: >420 um"
    m2["Comments"] = m2["Comments"]+", L: 250-420 um"
    m3["Comments"] = m3["Comments"]+", M: 177-250 um"
    m4["Comments"] = m4["Comments"]+", S: 125-177 um"
    m5["Comments"] = m5["Comments"]+", XS: 62-125 um"
    
    
    
    #merge all together
    m82 = pd.merge(m82, m1, how="outer")
    m82 = pd.merge(m82, m2, how="outer")
    m82 = pd.merge(m82, m3, how="outer")
    m82 = pd.merge(m82, m4, how="outer")
    m82 = pd.merge(m82, m5, how="outer")

    
m82["Source"] = "Metzler et al., 1982"
m82["Source_abbrev"] = "M82"
m82["Sample"] = "Foraminifera"
m82["Material"] = "Calcite"
m82["Organics"] = True
m82["Device"] = False
m82["Biogenic"] = True
m82["Year"] = 1982 #no data given for deployment time



#merge
df = pd.merge(df, m82, how="outer")



### add Troy 1997

In [9]:
#Troy 1997
#units: mg cm-2 yr-1 (calculated from AFM measurements)
t97_feb_calc = pd.read_csv(path+'Troy_1997_Feb_calcitedissrates.csv')
t97_jun_calc = pd.read_csv(path+'Troy_1997_Jun_calcitedissrates.csv')

t97_feb_calc["Comments"] = "based on surface roughness, cm big pieces, deployed in February"
t97_jun_calc["Comments"] = "based on surface roughness, cm big pieces, deployed in June"

t97_feb_calc["Month"] = 2
t97_jun_calc["Month"] = 6



#merge
t97 = pd.merge(t97_feb_calc, t97_jun_calc, how="outer")

t97 = t97.sort_values("Depth")
    
#add other columns
t97["Rate"] = t97["Rate"]/365
t97["Rate_error"] = t97["Rate_error"]/365
t97 = t97.rename(columns={"Rate": "Rate_sa", "Rate_error": "Rate_error_sa"})
t97 = t97.drop("Rate_max", axis=1)
t97["Source"] = "Troy et al, 1997"
t97["Source_abbrev"] = "T97"
t97["Latitude"] = 22.75
t97["Longitude"] = -158.0
t97["Sample"] = "Calcite crystal"
t97["Material"] = "Calcite"
t97["Organics"] = False
t97["Device"] = False
t97["Deployment_d"] = 3
t97["Biogenic"] = False
t97["Mesh"] = 333
t97["Year"] = 1993

#merge
df = pd.merge(df, t97, how="outer")




### add Fukuhara 2008

In [10]:
#Fukuhara
#units: %/day
f08_arag = pd.read_csv(path+'Fukuhara_2008_aragonitedissrates.csv')
f08_calc = pd.read_csv(path+'Fukuhara_2008_calcitedissrates.csv')
f08_sacc = pd.read_csv(path+'Fukuhara_2008_sacculiferdissrates.csv')
f08_infl = pd.read_csv(path+'Fukuhara_2008_inflatadissrates.csv')
f08_trunc = pd.read_csv(path+'Fukuhara_2008_truncatulinoidesdissrates.csv')

f08_arag["Sample"] = "Aragonite crystal"
f08_calc["Sample"] = "Calcite crystal"
f08_sacc["Sample"] = "G. sacculifer (Forams)"
f08_infl["Sample"] = "G. inflata (Forams)"
f08_trunc["Sample"] = "G. truncatulinoides (Forams)"

f08_arag["Material"] = "Aragonite"
f08_calc["Material"] = "Calcite"
f08_sacc["Material"] = "Calcite"
f08_infl["Material"] = "Calcite"
f08_trunc["Material"] = "Calcite"

f08_arag["Organics"] = False
f08_calc["Organics"] = False
f08_sacc["Organics"] = True
f08_infl["Organics"] = True
f08_trunc["Organics"] = True

f08_arag["Biogenic"] = False
f08_calc["Biogenic"] = False
f08_sacc["Biogenic"] = True
f08_infl["Biogenic"] = True
f08_trunc["Biogenic"] = True

f08_data = [f08_arag, f08_calc, f08_sacc, f08_infl, f08_trunc]


In [11]:
f08_final = pd.DataFrame(columns=["Depth"])

for f08 in f08_data:
    
    f08 = f08.sort_values("Depth")
    f08["Depth"] = f08["Depth"].astype("float")

    fs = f08[["Depth", "S-63", "Sample", "Organics", "Material", "Biogenic"]]
    fm = f08[["Depth", "M-63", "Sample", "Organics", "Material", "Biogenic"]]
    fl30 = f08[["Depth", "L-30", "Sample", "Organics", "Material", "Biogenic"]]
    fl63 = f08[["Depth", "L-63", "Sample", "Organics", "Material", "Biogenic"]]
    fd = f08[["Depth", "Device L-36", "Sample", "Organics", "Material", "Biogenic"]]
    
    #rename
    fs = fs.rename(columns={"S-63": "Rate_mass"})
    fm = fm.rename(columns={"M-63": "Rate_mass"})
    fl30 = fl30.rename(columns={"L-30": "Rate_mass"})
    fl63 = fl63.rename(columns={"L-63": "Rate_mass"})
    fd = fd.rename(columns={"Device L-36": "Rate_mass"})
    
    #delete all rows with no data in diss column
    fs = fs.dropna(subset = ["Rate_mass"])
    fm = fm.dropna(subset = ["Rate_mass"])
    fl30 = fl30.dropna(subset = ["Rate_mass"])
    fl63 = fl63.dropna(subset = ["Rate_mass"])
    fd = fd.dropna(subset = ["Rate_mass"])

    # add additional info
    fs["Size"] = "S"
    fm["Size"] = "L"
    fl30["Size"] = "XL"
    fl63["Size"] = "XL"
    fd["Size"] = "XL"
    
    fs["Mesh"] = 63
    fm["Mesh"] = 63
    fl30["Mesh"] = 30
    fl63["Mesh"] = 63
    fd["Mesh"] = 63
    
    fs["Device"] = False
    fm["Device"] = False
    fl30["Device"] = False
    fl63["Device"] = False
    fd["Device"] = True
    
    fs["Comments"] = "S: 125-250 um"
    fm["Comments"] = "L: 250-425 um"
    fl30["Comments"] = "XL: 425-500 um"
    fl63["Comments"] = "XL: 425-500 um"
    fd["Comments"] = "XL: 425-500 um"
    
    
    #merge all together
    f08_final = pd.merge(f08_final, fs, how="outer")
    f08_final = pd.merge(f08_final, fm, how="outer")
    f08_final = pd.merge(f08_final, fl30, how="outer")
    f08_final = pd.merge(f08_final, fl63, how="outer")
    f08_final = pd.merge(f08_final, fd, how="outer")
    

f08_final["Source"] = "Fukuhara et al., 2008"
f08_final["Source_abbrev"] = "F08"
f08_final["Latitude"] = 29.98
f08_final["Longitude"] = 175.0
f08_final["Deployment_d"] = 23
f08_final["Month"] = 9
f08_final["Year"] = 2001


#merge
df = pd.merge(df, f08_final, how="outer")




### add Subhas 2022 (also includes the data for Dong 2019 and Naviaux 2019)

In [12]:
#Subhas data
#not using all of it, there are some few datapoints, such as benthicforams or planktic_assemblage that I am not using
s22 = pd.read_csv(path+'CDisk4_compiled_dissolution_data_all_updated.txt', delimiter="\t") 


#rename all of their weird column names
s22 = s22.rename(columns={"Lat": "Latitude", "Long": "Longitude", "Depthm": "Depth", 
                          "TempC": "Temp_CDisk4", 
                          "Rateggd": "Rate_mass", "Rategcm2d": "Rate_sa", 
                          "Rateerrorggd": "Rate_error_mass", 'Rateerrorgcm2d': 'Rate_error_sa', 
                          "S": "Salinity_CDisk4", "Talkumolk": "TA_CDisk4", 
                          "DICAlkpH0": "DIC_CDisk4_calc13",
                          "pHp0T25S": 'pH_CDisk4_T25',
                         "Oca": "Oca_CDisk4_calc13", "Oar": 'Oar_CDisk4_calc13'})

s22["Device"] = True
s22["Year"] = 2017
s22["Month"] = 8
s22["Deployment_d"] = 1

#convert ggd to %/day
s22["Rate_mass"] = s22["Rate_mass"]*100
s22["Rate_error_mass"] = s22["Rate_error_mass"]*100

#convert gcm2d in mg cm-2 d-1
s22["Rate_sa"] = s22["Rate_sa"]*1000
s22["Rate_error_sa"] = s22["Rate_error_sa"]*1000

#this is so I have no duplicate Longitudes (nice for later)
s22.Longitude = np.where(s22.Station.eq(5), -155.28, s22.Longitude)
#there is a problem with the naviaux data... (with the post storm measurements)
#at station 5 the latitude should be 49.683 but is incorrectly sometimes that of station 2: 27.750
s22.Latitude = np.where(s22.Station.eq(5), 49.683, s22.Latitude)

#so this step is probably a bit stupid but easier for me if I separate it into the different papers and add the info separately
s22_arag = s22[s22["sample_id"] == "aragonite"][["Depth", "Latitude", "Longitude", "Rate_mass", "Rate_sa", 
                                                 "Rate_error_mass", "Rate_error_sa", "Temp_CDisk4", 
                                                 'pH_CDisk4_T25', 
                                                 "Device", "Salinity_CDisk4", "TA_CDisk4", "DIC_CDisk4_calc13", 
                                                 "Year", "Month", 
                                                 'Oca_CDisk4_calc13', 'Oar_CDisk4_calc13', 
                                                 "Deployment_d"]].copy()
s22_ehux = s22[s22["sample_id"] == "ehuxleyi"][["Depth", "Latitude", "Longitude", "Rate_mass", "Rate_sa", 
                                                "Rate_error_mass", "Rate_error_sa", "Temp_CDisk4", 
                                                'pH_CDisk4_T25', 
                                                "Device", "Salinity_CDisk4", "TA_CDisk4", "DIC_CDisk4_calc13", 
                                                "Year", "Month", 
                                                'Oca_CDisk4_calc13', 'Oar_CDisk4_calc13', 
                                                "Deployment_d"]].copy()
s22_calc = s22[s22["sample_id"] == "calcite"][["Depth", "Latitude", "Longitude", "Rate_mass", "Rate_sa", 
                                               "Rate_error_mass", "Rate_error_sa", "Temp_CDisk4", 
                                               'pH_CDisk4_T25', 
                                               "Device", "Salinity_CDisk4", "TA_CDisk4", "DIC_CDisk4_calc13", 
                                               "Year", "Month", 
                                               'Oca_CDisk4_calc13', 'Oar_CDisk4_calc13', 
                                               "Deployment_d"]].copy()

s22_arag["Material"] = "Aragonite"
s22_arag["Sample"] = "Synthetic Aragonite"
s22_arag["Comments"] = "from Subhas Dataset, 250-495 um size"
s22_arag['Omega_CDisk4_calc13'] = s22_arag['Oar_CDisk4_calc13']
s22_arag["Source"] = "Dong et al. (2019)"
s22_arag["Source_abbrev"] = "D19"
s22_arag["Organics"] = False
s22_arag["Biogenic"] = False
s22_arag["Size"] = "L"
s22_arag["Mesh"] = 8

s22_calc["Material"] = "Calcite"
s22_calc["Sample"] = "Synthetic Calcite"
s22_calc["Comments"] = "from Subhas Dataset, 20-53 um size"
s22_calc['Omega_CDisk4_calc13'] = s22_calc['Oca_CDisk4_calc13']
s22_calc["Source"] = "Naviaux et al. (2019)"
s22_calc["Source_abbrev"] = "N19"
s22_calc["Organics"] = False
s22_calc["Biogenic"] = False
s22_calc["Size"] = "XXS"
s22_calc["Mesh"] = 8

s22_ehux["Material"] = "Calcite"
s22_ehux["Sample"] = "E. huxleyi"
s22_ehux["Comments"] = "bleached, specific surface area: 105000 cm2 g^-1, estimated size of lith: 5 - 10 um"
s22_ehux = s22_ehux.rename(columns={"Oca": "Omega_CDisk4"})
s22_ehux['Omega_CDisk4_calc13'] = s22_ehux['Oca_CDisk4_calc13']
s22_ehux["Source"] = "Subhas et al. (2022)"
s22_ehux["Source_abbrev"] = "S22"
s22_ehux["Organics"] = False
s22_ehux["Biogenic"] = True
s22_ehux["Size"] = "XXXS"
s22_ehux["Mesh"] = 0.8


#merge these three together again
s22_all = [s22_calc, s22_arag, s22_ehux]

s22_fin = pd.concat(s22_all, join='outer', axis=0)

#merge all
df = pd.merge(df, s22_fin, how="outer")

In [13]:
df["Size"].unique()

array([nan, 'L', 'XXL', 'XXXS', 'M', 'XXS', 'XL', 'S', 'XS'], dtype=object)

In [14]:
df.columns

Index(['Depth', 'Source', 'Source_abbrev', 'Latitude', 'Longitude', 'Sample',
       'Material', 'Rate_sa', 'Rate_error_sa', 'Organics', 'Device',
       'Deployment_d', 'Biogenic', 'Year', 'Comments', 'Mesh', 'Rate_mass',
       'Month', 'Size', 'Fragmentation_pct', 'Rate_error_mass', 'Temp_CDisk4',
       'pH_CDisk4_T25', 'Salinity_CDisk4', 'TA_CDisk4', 'DIC_CDisk4_calc13',
       'Oca_CDisk4_calc13', 'Oar_CDisk4_calc13', 'Omega_CDisk4_calc13'],
      dtype='object')

## Save as csv

In [15]:
df.to_csv("data/in_situ_rates_compiled.csv", index=False)