In [32]:
# import statements
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
from mpl_toolkits.axes_grid1.inset_locator import inset_axes  # for sed rate plot
from scipy.stats import chisquare
%matplotlib inline
plt.rcParams["font.family"] = "Avenir"

In [4]:
# read in data (loads all sheets and creates a dictionary of dataframes)
# looking at interpolated data here, but raw data is also available
# and would be useful if you want to do a downcore compilation plot
comps = pd.read_excel("data/comp_inter.xlsx", sheet_name=None)

In [47]:
# extract magnetite abundances
# ignore data outside min and max TOC at towuti
# keep towuti and non-towuti data separate
TOC_min = 0.5054
TOC_max = 2.991
towuti = []
others = []
for key in comps:
    data = comps[key][["Magnetite [ppm]", "TOC [wt. %]"]].dropna()
    # take only data where TOC is between min and max
    filtered = data[data["TOC [wt. %]"].between(TOC_min,TOC_max)]["Magnetite [ppm]"]

    if key=="IDLE TOW10-9B":
        towuti.extend(filtered)
    else:
        others.extend(filtered)

# define bin widths
# not sure if it matters whether bins are lin or log spaced
# obviously bins need to be the same for towuti and others, and should cover entire data range
max_magnetite = 12090
bins = np.linspace(0,max_magnetite)
# create histogram of non-towuti data as pdf
pdf = np.histogram(others, bins, density=True)
# create histogram of towuti data on same bins
towuti_freq = np.histogram(towuti,bins, density=True)
# calculate chi-square using the histogram values (first returned array from np.histogram)
chisq, p = chisquare(towuti_freq[0], pdf[0])
print(towuti_freq[0]*5000)
print(pdf[0]*5000)

[0.         0.         0.         0.         0.         0.
 0.         0.         2.38408018 0.         1.19204009 0.
 0.         1.19204009 3.57612027 0.         0.         2.38408018
 0.         2.38408018 0.         1.19204009 0.         0.
 0.         0.         0.         0.         0.         1.19204009
 1.19204009 1.19204009 0.         0.         0.         0.
 0.         0.         0.         1.19204009 0.         0.
 0.         0.         0.         0.         0.         0.
 1.19204009]
[6.8318727  6.8318727  0.73129905 0.78903318 0.4811178  0.28867068
 0.40413895 0.4811178  0.90450146 1.40486396 0.61583078 0.09622356
 0.03848942 0.05773414 0.03848942 0.13471298 0.03848942 0.
 0.         0.05773414 0.01924471 0.         0.01924471 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.        

  terms = (f_obs_float - f_exp)**2 / f_exp
  terms = (f_obs_float - f_exp)**2 / f_exp


In [None]:
# create a dictionary to match a unique color and unique marker to each sheet
# 20 sheets at the moment (most recently-added data is Towuti, last sheet in file is M0063 sarah data)
# ['HLY05-01 JPC-TC5', 'IODP 178-1096', 'SPR0901-04BC', 'IODP 349-1431', 'IODP 334-1378', 'IODP 334-1379', 'ODP 180-1109', 'ODP 180-1115', 'ODP 180-1118', 'NBP1203 JKC36', 'IODP 303-U1302-3', 'IODP 303-U1304', 'IODP 303-U1308', 'IODP 342-U1406', 'ODP 175-1075', 'IODP 347-M0063', 'IDLE TOW10-9B', 'NBP10-01 JPC-126', 'NBP10-01 JKC-55, JPC-127', 'IODP 347-M0063_Slotznick']
# sorted by size of dataset descending: ['IODP 178-1096', 'NBP10-01 JPC-126', 'NBP10-01 JKC-55, JPC-127', 'HLY05-01 JPC-TC5', 'NBP1203 JKC36', 'IODP 349-1431', 'ODP 180-1109', 'ODP 180-1118', 'ODP 180-1115', 'SPR0901-04BC', 'IODP 334-1378', 'IODP 347-M0063', 'IODP 334-1379', 'IODP 303-U1302-3', 'IODP 303-U1304', 'IODP 303-U1308', 'IODP 342-U1406', 'IDLE TOW10-9B', 'ODP 175-1075', 'IODP 347-M0063_Slotznick']

sorted_keys = list(comps.keys())
sorted_keys.sort()  # plotting in alphabetical order so it's not just random
#print(sorted_keys)
# sort instead by number of magnetite points
size_sort = list(comps.keys())


def sort_func(item):
    return comps[item]["Magnetite [ppm]"].dropna().size


size_sort.sort(reverse=True, key=sort_func)
#print(size_sort)

colors = ["r", "b", "g", "c", "m", "y", "mediumorchid", "orange", "royalblue", "darkviolet", "grey", "chartreuse",
          "darkcyan", "coral", "maroon", "yellowgreen", "springgreen", "orangered", "k", "slateblue"]
#markers = [".",".",".",".",".",".",".",".",".",".",".",".",".",".",".",".",".",".",".","."]
markers = [".", "v", "^", "<", ">", "8", "X", "p", "*", "h", "+", "H", "1", "2", "3", "4", "P", "D", "d", "x"]
color_dict = dict(zip(size_sort, colors))
marker_dict = dict(zip(size_sort, markers))