This notebook groups the data by Latitude category. It is used for exploration purposes.

In [1]:
from pathlib import Path
import pandas as pd
import plotly.graph_objects as go

from utils import fill_nans, apply_final_filtering

Set directories and read in source data

In [2]:
input_path = Path("data", "input")
output_path = Path("data", "by_latitude")
if not output_path.exists():
    output_path.mkdir()

df = pd.read_excel(
    input_path / "Table S2_SST compilation.xlsx", sheet_name=0, skiprows=[1]
)


Group by latitude category and visualise the data

In [3]:
latitude_groups = df.groupby("Latitude category")

# plot
temperature_col = "SST Bayspar (°C)"
mode = "markers"
fig = go.Figure()
colors = {
    "low latitude": "red",
    "mid latitude": "orange",
    "transition": "lightblue",
    "high latitude": "blue",
    "upwelling": "green",
}
for lat, df_lat in latitude_groups:
    color = colors[lat]
    fig.add_trace(
        go.Scatter(
            x=df_lat["Age (Ma)"],
            y=df_lat[temperature_col],
            mode="markers",
            marker=dict(color=color, symbol="circle"),
            name=lat,
        )
    )
fig.update_layout(
    title_text=f"Sea surface temperature {temperature_col} by category",
    height=800,
    xaxis_title="Age (Ma)",
    yaxis_title="Temperature (C)",
)
fig.show()


Save each category to a separate file to allow for further exploration if required

In [4]:
for lat, df_lat in latitude_groups:
    df_lat.to_csv(output_path / f"categorised_{lat}.csv", index=False)


Filter the data using the final filtering and then group by latitude category again

In [5]:
df = fill_nans(df)
filtered, postpend = apply_final_filtering(df)
latitude_groups_filtered = filtered.groupby("Latitude category")


2022-07-15 12:33:36.701 | INFO     | utils:filter_mi:43 - Filtering on methane index < 0.4
2022-07-15 12:33:36.704 | INFO     | utils:filter_mi:44 - number of rows pre filter = 5170
2022-07-15 12:33:36.716 | INFO     | utils:filter_mi:46 - number of rows post filter = 5006
2022-07-15 12:33:36.718 | INFO     | utils:filter_gdgtrs:52 - Filtering on GDGTRS < 30
2022-07-15 12:33:36.720 | INFO     | utils:filter_gdgtrs:53 - number of rows pre filter = 5006
2022-07-15 12:33:36.731 | INFO     | utils:filter_gdgtrs:55 - number of rows post filter = 4800
2022-07-15 12:33:36.734 | INFO     | utils:filter_bit_ringstetra:70 - Filtering on BIT > 0.4 and RINGSTETRA < 0.7
2022-07-15 12:33:36.736 | INFO     | utils:filter_bit_ringstetra:73 - number of rows pre filter = 4800
2022-07-15 12:33:36.748 | INFO     | utils:filter_bit_ringstetra:77 - number of rows post filter = 4518


Repeat the same plot but with the filtered data

In [6]:
# plot
temperature_col = "SST Bayspar (°C)"
mode = "markers"
fig = go.Figure()

colors = {
    "low latitude": "red",
    "mid latitude": "orange",
    "transition": "lightblue",
    "high latitude": "blue",
    "upwelling": "purple",
}
for lat, df_lat in latitude_groups_filtered:
    color = colors[lat]
    fig.add_trace(
        go.Scatter(
            x=df_lat["Age (Ma)"],
            y=df_lat[temperature_col],
            mode="markers",
            marker=dict(color=color, symbol="circle"),
            name=lat,
        )
    )
fig.update_layout(
    title_text=f"Sea surface temperature {temperature_col} by category",
    height=800,
    xaxis_title="Age (Ma)",
    yaxis_title="Temperature (C)",
)
fig.show()


Save the latitude categories for the filtered data into separate files for further
exploration if required

In [7]:
for lat, df_lat in latitude_groups_filtered:
    df_lat.to_csv(output_path / f"categorised_{lat}{postpend}.csv", index=False)
