In [1]:
from src.data.datasets.glc_po import GLCPODataset
from src.data.predictors.point_bioclim import PointwiseBioclimEuropePredictor
from src.data.predictors.loc import LocationPredictor
import numpy as np
import torch
import pandas as pd
from src.data.predictors.point_bioclim import PointwiseBioclimEuropePredictor
from src.data.predictors.loc import LocationPredictor
import torch

In [2]:
bioclim_path = "/shares/wegner.ics.uzh/glc23_data/bioclim+elev/bioclim_elevation_scaled_europe.npy"
po_dataset_path = "/shares/wegner.ics.uzh/glc23_data/Presences_only_train.csv"

bioclim = PointwiseBioclimEuropePredictor(bioclim_path)
loc = LocationPredictor()

predictors = {"loc_cyclical_europe": loc, 
             "bioclim_pointwise_europe": bioclim}

In [3]:
# example of what abstract_dataset.py is doing:
sample = dict()
for name, pred in predictors.items():
    print(name, pred)
    sample[name] = pred.sample_location(lon=30, lat=40, time=None)

# adding gaussian noise with std dev=1:

noise = torch.randn_like(sample["bioclim_pointwise_europe"])
sample["bioclim_pointwise_europe"] += noise

loc_cyclical_europe Location embedder with mode default and bounds {'north': 90, 'south': -90, 'west': -180, 'east': 180} (strict: False)
bioclim_pointwise_europe Bioclimatic embedder for Europe with resolution 1km and bounds {'north': 72, 'south': 34, 'west': -11, 'east': 35} (strict: True)


In [20]:
# for each bioclimatic variable, calculate its max-min range in the raster and save the results 

context_feats = np.load(bioclim_path).astype(np.float32)
raster_raw = torch.from_numpy(context_feats)
matrix_list = torch.split(raster_raw, 1, dim=2)
# Now, flatten each 2D matrix and concatenate them into a list
flattened_matrices = [matrix.view(-1) for matrix in matrix_list]

max_values = []
min_values = []
std_values = []
ranges = []

# Iterate through each flattened matrix
for flattened_matrix in flattened_matrices:
    # Convert the flattened matrix to a NumPy array
    flattened_array = flattened_matrix.numpy()

    # Calculate maximum value, handling NaNs
    max_val = np.nanmax(flattened_array)
    max_values.append(max_val)

    # Calculate minimum value, handling NaNs
    min_val = np.nanmin(flattened_array)
    min_values.append(min_val)

    # Calculate standard deviation, handling NaNs
    std_val = np.nanstd(flattened_array)
    std_values.append(std_val)

    range = max_val-min_val
    ranges.append(range)

# save the max-min range of the variables to use it later
torch.save(torch.tensor(ranges), "/shares/wegner.ics.uzh/glc23_data/bioclim+elev/bioclim_var_range.pt")

raster_summary = pd.DataFrame([max_values, min_values, ranges, std_values], index=["max", "min", "range", "std"]).T
raster_summary

Unnamed: 0,max,min,range,std
0,2.509159,-5.674059,8.183218,1.014119
1,9.268866,-1.48209,10.750956,1.017262
2,3.976711,-5.276648,9.253359,1.008278
3,2.774734,-2.627551,5.402285,1.01043
4,7.367673,-1.953472,9.321145,1.005921
5,2.508163,-2.570136,5.0783,1.010508
6,10.10914,-2.2942,12.40334,1.018531
7,3.905723,-4.424264,8.329987,1.006047
8,11.770404,-2.847799,14.618202,1.010044
9,5.48212,-2.30166,7.78378,1.000815


In [25]:
torch.save(torch.tensor(ranges), "/shares/wegner.ics.uzh/glc23_data/bioclim+elev/bioclim_var_range.pt")

In [25]:
raster_summary['range'] = raster_summary['max'] - raster_summary['min']

# Define a noise fraction
noise_fraction = 0.1  # You can adjust this as needed

# Calculate the standard deviation as a fraction of the range
raster_summary['std_dev'] = raster_summary['range'] * noise_fraction

# Convert the 'std_dev' column to a list
std_deviations = raster_summary['std_dev'].tolist()

raster = raster_raw.clone()
raster[torch.isnan(raster)] = (0.0)

In [27]:
noise = torch.zeros_like(sample["bioclim_pointwise_europe"])

In [27]:
fraction = 0.1

noise_stdev = torch.tensor(ranges)*fraction

noise = torch.randn_like(sample["bioclim_pointwise_europe"])*noise_stdev  

In [29]:
noise_stdev

tensor([0.8183, 1.0751, 0.9253, 0.5402, 0.9321, 0.5078, 1.2403, 0.8330, 1.4618,
        0.7784, 0.4701, 0.7877, 0.9346, 0.7122, 0.6940, 0.2020, 0.9134, 1.2151,
        1.2200, 0.9465])

In [28]:
noise

tensor([-0.9598, -0.5885,  0.8878, -0.8077,  1.7131, -0.7112, -1.6376,  0.6313,
        -1.7887,  0.0258,  0.1389,  1.0446, -0.8078,  1.2019,  1.4991,  0.2133,
        -0.0404,  0.2499, -1.3320, -0.9017])

In [None]:
fraction = 0.1

noise_stdev = variable_range*fraction

noise = torch.randn_like(sample["bioclim_pointwise_europe"])*noise_stdev       
# Add the noise to the sample
sample["bioclim_pointwise_europe"] += noise