In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bramm_data_analysis import preprocessing
import gstools as gs

In [None]:
columns = {}

columns["metadata"] = [
    "site_code",
    "site_insee_code",
    "department_code",
    "latitude",
    "longitude",
    "x_lambert",
    "y_lambert",
    "altitude",
    "date",
    "sample_code",
    "sample_send_europe",
]
columns["sample"] = [
    "strand_size",
    "visible_dust_particles",
    "visible_pollen_particles",
    "mineral_type",
]
columns["site"] = [
    "samples_nb",
    "fern_samples_nb",
    "tree_samples_nb",
    "herb_samples_nb",
    "litter_samples_nb",
    "humus_samples_nb",
    "soi_samples_nb",
    "sand_samples_nb",
    "hard_strain_coniferous_samples_nb",
    "hard_strain_hardwood_samples_nb",
    "hard_strain_unknown_samples_nb",
    "decomposed_strain_coniferous_samples_nb",
    "decomposed_strain_hardwood_samples_nb",
    "decomposed_strain_unknown_samples_nb",
    "bark_coniferous_nb",
    "bark_hardwood_samples_nb",
    "bark_unknown_samples_nb",
    "hard_coniferous_samples_nb",
    "hard_hardwood_samples_nb",
    "hard_unknown_samples_nb",
    "decomposed_coniferous_samples_nb",
    "decomposed_hardwood_samples_nb",
    "decomposed_unknown_samples_nb",
]
columns["studies"] = [
    "sample_outside_complementary_study",
    "cs_3_species_comparison",
    "cs_2_species_comparison",
    "cs_repeated_sampling",
    "cs_repeated_analysis",
]
columns["incertitudes"] = [
    "aluminium_incertitude",
    "arsenic_incertitude",
    "calcium_incertitude",
    "cadmium_incertitude",
    "cobalt_incertitude",
    "chromium_incertitude",
    "copper_incertitude",
    "iron_incertitude",
    "mercury_incertitude",
    "nitrogen_incertitude",
    "sodium_incertitude",
    "nickel_incertitude",
    "lead_incertitude",
    "sulfur_incertitude",
    "zinc_incertitude",
]
columns["categorical"] = [
    "weather",
    "tree_layer",
    "tree_layer_complement",
    "tree_cover",
    "species",
]

In [None]:
moss_data_path = Path("../data/Mines_2024.xlsx")
df = preprocessing.MossPreprocessor(moss_data_path).load_preprocess()

In [None]:
# Remove Columns with nans
df.loc[
    df["tree_layer_complement"].isna(), "tree_layer_complement"
] = "unspecified"

df = df.drop(columns=columns["incertitudes"])

print("The following columns contains NaN and will be removed :")
print(df.columns[df.isna().any(axis=0)])
df = df.drop(columns=df.columns[df.isna().any(axis=0)])

In [None]:
df.columns

In [None]:
x1 = "longitude"
x2 = "latitude"
z1 = "lead"

final_df = df.filter([x1, x2, z1])

In [None]:
slice_df = final_df[final_df[z1] <= 2000]
x = slice_df[x1]
y = slice_df[x2]
field = slice_df[z1]

In [None]:
bins = np.arange(10)
bin_center, gamma = gs.vario_estimate((x, y), field, bins)

In [None]:
models = {
    "Gaussian": gs.Gaussian,
    "Exponential": gs.Exponential,
    "Matern": gs.Matern,
    "Stable": gs.Stable,
    "Rational": gs.Rational,
    "Circular": gs.Circular,
    "Spherical": gs.Spherical,
    "SuperSpherical": gs.SuperSpherical,
    "JBessel": gs.JBessel,
}
scores = {}

In [None]:
# plot the estimated variogram
plt.scatter(bin_center, gamma, color="k", label="data")
ax = plt.gca()

# fit all models to the estimated variogram
for model in models:
    fit_model = models[model](dim=2)
    para, pcov, r2 = fit_model.fit_variogram(bin_center, gamma, return_r2=True)
    fit_model.plot(x_max=40, ax=ax)
    scores[model] = r2

In [None]:
field.describe()