In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from pathlib import Path
from sklearn.neighbors import NearestNeighbors
from bramm_data_analysis import preprocessing
from bramm_data_analysis.matching import Matcher
import matplotlib.pyplot as plt

# Load Data

### Load Moss Data

In [None]:
moss_data_path = Path("../data/Mines_2024.xlsx")
df_moss = preprocessing.MossPreprocessor(moss_data_path).load_preprocess()

### Load RMQS Data

In [None]:
rmqs_data_path = Path("../data/RMQS.csv")
df_rmqs = preprocessing.RMQSPreprocessor(rmqs_data_path).load_preprocess()

In [None]:
df_rmqs["date_complete"].dt.year.hist()

## Closest Points Matching

### Actual Matching

In [None]:
data_matcher = Matcher(km_threshold=10)
matched_df = data_matcher.match_rmqs_to_moss(
    df_moss,
    df_rmqs,
    radians=False,
)

In [None]:
print(
    f" Year Threshold : {data_matcher.year_threshold} \n",
    f"Distance Threshold : {data_matcher.km_threshold} km \n",
    f"Conserved : {matched_df.shape[0]} / {df_moss.shape[0]}",
)

### Matched Data Visualization

In [None]:
plt.scatter(
    df_moss["longitude"],
    df_moss["latitude"],
    label="all moss",
    alpha=0.4,
    color="grey",
)
plt.scatter(
    matched_df[f"longitude{data_matcher.moss_suffix}"],
    matched_df[f"latitude{data_matcher.moss_suffix}"],
    label="Moss",
    alpha=0.5,
    color="green",
)
plt.scatter(
    matched_df[f"longitude{data_matcher.rmqs_suffix}"],
    matched_df[f"latitude{data_matcher.rmqs_suffix}"],
    label="RMQS",
    alpha=0.5,
    color="red",
)
plt.legend()
plt.plot()

## Multivariate Analysis

In [None]:
import gstlearn as gl
import gstlearn.plot as gp
import gstlearn.document as gdoc
import gstools as gs

verbose = True
graphics = True
gl.OptCst.define(gl.ECst.NTCOL, 6)
gdoc.setNoScroll()

In [None]:
columns_matching = {
    "aluminium": {"moss": "aluminium", "rmqs": "al_tot_hf"},
    "calcium": {"moss": "calcium", "rmqs": "ca_tot_hf"},
    "iron": {"moss": "iron", "rmqs": "fe_tot_hf"},
    "sodium": {"moss": "sodium", "rmqs": "na_tot_hf"},
    "cadmium": {"moss": "cadmium", "rmqs": "cd_tot_hf"},
    "cobalt": {"moss": "cobalt", "rmqs": "co_tot_hf"},
    "copper": {"moss": "copper", "rmqs": "cu_tot_hf"},
    "nickel": {"moss": "nickel", "rmqs": "ni_tot_hf"},
    "lead": {"moss": "lead", "rmqs": "pb_tot_hf"},
    "zinc": {"moss": "zinc", "rmqs": "zn_tot_hf"},
    # "arsenic" : {"moss": "arsenic", "rmqs" : "as_tot_hf" },
}

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "aluminium"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "calcium"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "iron"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "sodium"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "cadmium"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "cobalt"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "copper"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "nickel"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "lead"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()

In [None]:
x1 = "longitude"
x2 = "latitude"
metal = "zinc"
z1 = columns_matching[metal]["moss"]
z2 = columns_matching[metal]["rmqs"]
raw_df = matched_df.filter([x1, x2, z1, z2]).astype("float")
final_df = raw_df
mydb = gl.Db_fromPanda(final_df)
mydb.setLocators([x1, x2], gl.ELoc.X)
mydb.setLocator(z1, gl.ELoc.Z)
ax = gp.correlation(mydb, namex=z1, namey=z2, asPoint=True, regrLine=True)
ax.decoration(title=f"Correlation between {z1} and {z2}")
ax.plot()