analyzing k fold errors for mgwr


In [None]:
import numpy as np
import pandas as pd
from mgwr.mgwr.gwr import MGWR, GWR
from mgwr.mgwr.sel_bw import Sel_BW
from shapely.geometry import Point
import geopandas as gpd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pickle
import plotly.express as px
import plotly.graph_objects as go
import rasterio as rs


In [None]:

with open('../../k_fold_investigate_errors.pkl', 'rb') as f:
    loaded_r2_scores_dict = pickle.load(f)
    loaded_grain_pred = pickle.load(f)




train_set = pd.read_csv('../../data/train_set_with_feat_cleanCorr.csv')







In [None]:
results = loaded_r2_scores_dict

sorted_r2_scores = sorted(loaded_r2_scores_dict.items(), key=lambda item: np.nanmean(item[1]), reverse=True)
sorted_r2_scores = sorted_r2_scores[:15]
# Create boxplots for each feature combination sorted by mean R² score
plt.figure(figsize=(10, 6))

for i, (key, r2_scores) in enumerate(sorted_r2_scores):
    plt.boxplot(r2_scores, positions=[i], widths=0.6)

plt.xlabel('Feature Combination Index')
plt.ylabel('R² Scores')
plt.title('Boxplots of R² Scores for  Feature Combinations (Sorted by Mean)')
plt.xticks(range(len(sorted_r2_scores)), [key for key, _ in sorted_r2_scores])
plt.grid(True)
plt.show()




In [None]:
## Want to check diff between med-predictions vs. real data for each model

#first for each model, calculate median

med_grain_pred = {keys: np.nanmedian(loaded_grain_pred[keys],axis=0) for keys in loaded_grain_pred.keys()}


diff_grain_size = {keys: train_set['mean_gs']-med_grain_pred[keys] for keys in  loaded_grain_pred.keys()}


In [None]:
# Function to calculate slope and aspect for non-square cells
def calculate_slope_aspect(dem, cell_size_x, cell_size_y):
    dzdx = (dem[:, 2:] - dem[:, :-2]) / (2 * cell_size_x)
    dzdy = (dem[2:, :] - dem[:-2, :]) / (2 * cell_size_y)
   
    grid_size_y = np.max((dzdx.shape[0],dzdy.shape[0]))
    grid_size_x = np.max((dzdx.shape[1],dzdy.shape[1]))
    #print(grid_size)
    dzdx = np.pad(dzdx, ((0, 0), (1, 1)), mode='edge')
    dzdy = np.pad(dzdy, ((1, 1), (0, 0)), mode='edge')

    slope = np.arctan(np.sqrt(dzdx**2 + dzdy**2)) * (180 / np.pi)
    aspect = np.arctan2(dzdy, -dzdx) * (180 / np.pi)
    aspect = np.where(aspect < 0, 360 + aspect, aspect)

    return slope, aspect


# Create the plotly figure
gebco = rs.open(r'..\..\data\auxiliary_data\gebco.tif')
depth = gebco.read(1)


slope, aspect = calculate_slope_aspect(depth, 263, 463)

In [None]:

key_to_watch = 20

fig = px.imshow(slope, color_continuous_scale='temps', title='Depth with Data Points')
fig.update_layout(coloraxis_showscale=False)



# Add scatter plot for the data points
fig.add_trace(go.Scatter(
    x=train_set['x_im'],
    y=train_set['y_im'],
    mode='markers',
    marker=dict(size=1, color=diff_grain_size[key_to_watch], colorscale='magma', colorbar=dict(title='Grain size')),
    name='Data Points'
))

# Update layout
fig.update_layout(
    xaxis_title='Column',
    yaxis_title='Row',
    yaxis=dict(autorange='reversed')  # Reverse the y-axis to match the image orientation
)

# Show the plot
fig.show()

In [None]:
error_thres = 5
idx_error = np.abs(diff_grain_size[key_to_watch])>=error_thres

idx_Corr = idx_error == False
error_dataframe = train_set.loc[idx_error]
correct_dataframe = train_set.loc[idx_Corr]


In [None]:
print(error_dataframe.describe())


In [None]:
print(correct_dataframe.describe())

In [None]:
## Could we split the datasets? 

