In [1]:
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import rasterio
from tqdm.notebook import tqdm
import pandas as pd
import rasterio
import random
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support

import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as v2
import torchvision.models as models
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchinfo import summary

from terratorch.models.pixel_wise_model import freeze_module
from huggingface_hub import hf_hub_download
from terratorch.models.backbones.prithvi_mae import PrithviViT

In [2]:
def set_seed(seed):
    torch.manual_seed(seed) # Set seed for Python's built-in random number generator
    np.random.seed(seed) # Set seed for numpy
    if torch.cuda.is_available(): # Set seed for CUDA if available
        torch.cuda.manual_seed_all(seed)
        # Set cuDNN's random number generator seed for deterministic behavior
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [3]:
batch_size = 32
num_workers = 6
pa_presence_threshold = 1
num_classes_total = 11255
landsat_year_len = 18
bioclim_month_len = landsat_year_len*12-1
validation_prop = 0.1

mean_landsat = 1*np.array([ 15.0698,   16.0923,    7.9312,   68.9794,   47.9505,   24.8804, 7089.4349, 2830.6658])
std_landsat =  1*np.array([ 11.7218,   10.2417,    9.6499,   18.7112,   13.1681,    9.2436, 3332.3618,   56.7270])
mean_sentinel = 1*np.array([ 624.8547,  684.7646,  456.7674, 2924.1753])
std_sentinel =  1*np.array([ 416.0408,  351.1005,  315.8956,  943.6141])

class HorizontalCycleTransform(torch.nn.Module):
    def forward(self, img):
        img2 = torch.cat([img, img], -1)
        start = torch.randint(img.shape[-1], (1,))[0]
        new_img = img2[:,:,start:start+img.shape[-1]]
        return new_img

# class HorizontalCycleTransform(torch.nn.Module):
#     def forward(self, img):
#         new_img = img[:,:,torch.randperm(img.shape[-1])]
#         return new_img

transform_landsat = v2.Compose([
    v2.Normalize(mean_landsat, std_landsat),
    #v2.RandomHorizontalFlip(p=0.5),
    HorizontalCycleTransform()
])
transform_landsat_test = v2.Compose([
    v2.Normalize(mean_landsat, std_landsat),
])
transform_sentinel = v2.Compose([
    v2.Normalize(mean_sentinel, std_sentinel),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomVerticalFlip(p=0.5),
    v2.RandomRotation(180),
    v2.RandomResizedCrop(size=64, scale=(0.25, 1.0))
])
transform_sentinel_test = v2.Compose([
    v2.Normalize(mean_sentinel, std_sentinel)
])

In [4]:
set_seed(42)
path_data = "/home/gt/DATA/geolifeclef-2025"
train_path_sentinel = os.path.join(path_data, "SatelitePatches/PA-train")
train_path_landsat = os.path.join(path_data, "SateliteTimeSeries-Landsat/cubes/PA-train")
train_path_bioclim = os.path.join(path_data, "BioclimTimeSeries/cubes/PA-train")
train_metadata = pd.read_csv(os.path.join(path_data, "GLC25_PA_metadata_train.csv"))
train_metadata = train_metadata.dropna(subset="speciesId").reset_index(drop=True)
train_metadata['speciesId'] = train_metadata['speciesId'].astype(int)
train_metadata["speciesIdOrig"] = train_metadata['speciesId']
tmp = train_metadata["speciesId"].value_counts() >= pa_presence_threshold
train_metadata.loc[~train_metadata["speciesId"].isin(tmp[tmp].index), "speciesId"] = -1
sp_categorical = train_metadata["speciesId"].astype("category").values
num_classes = len(sp_categorical.categories)
train_metadata['speciesId'] = sp_categorical.codes

tmp = train_metadata.groupby("surveyId").agg({"surveyId":"first", "lat":"first", "lon":"first", "areaInM2":lambda x: list(x.unique()), "region":"first", "country":"first", "speciesId":list})
train_label_series = tmp.set_index("surveyId").speciesId
train_metadata = tmp.drop(columns=["speciesId"]).set_index("surveyId", drop=False)
train_metadata["area"] = train_metadata["areaInM2"].apply(lambda x: 1.0 if np.isinf(x).all() else np.mean(x, where=~np.isinf(x)))
train_metadata["areaLog"] = np.log10(train_metadata["area"])

train_metadata['area'].fillna(train_metadata['area'].mean(), inplace=True)
train_metadata['areaLog'].fillna(train_metadata['areaLog'].mean(), inplace=True)
train_metadata["conFra"] = train_metadata["country"] == "France"
train_metadata["conDen"] = train_metadata["country"] == "Denmark"
train_metadata["conNet"] = train_metadata["country"] == "Netherlands"
train_metadata["conIta"] = train_metadata["country"] == "Italy"
train_metadata["conAus"] = train_metadata["country"] == "Austria"
train_metadata["conOther"] = ~train_metadata["country"].isin(["France","Denmark","Netherlands","Italy","Austria"])
train_elevation = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "Elevation", "GLC25-PA-train-elevation.csv"), index_col=0)
train_elevation['Elevation'].fillna((train_elevation['Elevation'].mean()), inplace=True)
train_soil = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "SoilGrids", "GLC25-PA-train-soilgrids.csv"), index_col=0)
for column in train_soil.columns: train_soil[column].fillna((train_soil[column].mean()), inplace=True)
train_worldcover = pd.read_csv(os.path.join(path_data, "worldcover", "s2_pa_train_survey_points_with_worldcover.csv"), index_col=0)
train_wcdummy = pd.get_dummies(train_worldcover["class"], prefix="wc")
train_wcdummy.drop(columns="wc_70", inplace=True)
train_wcdummy.drop(columns="wc_100", inplace=True)
train_landcover = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "LandCover", "GLC25-PA-train-landcover.csv"), index_col=0)
landcover_col_ind = [0,2,3,5,8,11,12]
train_landcover = train_landcover.iloc[:, landcover_col_ind]

print("All rows match: ", (train_metadata.index==train_elevation.index).all() and (train_metadata.index==train_soil.index).all() \
     and (train_metadata.index==train_worldcover.index).all() and (train_metadata.index==train_landcover.index).all())
cov_columns = ["areaLog", "Elevation", "conFra", "conDen", "conNet", "conIta", "conAus", "conOther"] + list(train_soil.columns) + list(train_wcdummy.columns) + list(train_landcover.columns)
train_combined = pd.concat([train_metadata, train_elevation.Elevation, train_soil, train_wcdummy, train_landcover], axis=1)
cov_norm_coef = train_combined.loc[:,cov_columns].agg(['mean', 'std'])
dummy_columns = ["conFra","conDen","conNet","conIta","conAus","conOther"] + list(train_wcdummy.columns)
cov_norm_coef.loc["mean",dummy_columns] = 0
cov_norm_coef.loc["std",dummy_columns] = 1
train_combined.loc[:,cov_columns] = (train_combined.loc[:,cov_columns] - cov_norm_coef.loc["mean"]) / cov_norm_coef.loc["std"]

val_ind = np.sort(train_combined.surveyId.sample(frac=validation_prop).values)
train_data, val_data = [x.reset_index(drop=True) for _, x in train_combined.groupby(train_combined.surveyId.isin(val_ind))]
train_label_dict = train_label_series[train_data.surveyId].to_dict()
val_label_dict = train_label_series[val_data.surveyId].to_dict()


All rows match:  True


In [5]:
# Load Test metadata
test_path_sentinel = os.path.join(path_data, "SatelitePatches/PA-test")
test_path_landsat = os.path.join(path_data, "SateliteTimeSeries-Landsat/cubes/PA-test")
test_path_bioclim = os.path.join(path_data, "BioclimTimeSeries/cubes/PA-test")
test_metadata = pd.read_csv(os.path.join(path_data, "GLC25_PA_metadata_test.csv")).set_index("surveyId", drop=False).sort_index()
test_metadata.rename(columns={"areaInM2": "area"}, inplace=True)
test_metadata.replace({"area": [np.inf, -np.inf]}, 1.0, inplace=True)
test_metadata['areaLog'] = np.log10(test_metadata['area'])
test_metadata['area'].fillna(test_metadata['area'].mean(), inplace=True)
test_metadata['areaLog'].fillna(test_metadata['areaLog'].mean(), inplace=True)
test_metadata["conFra"] = test_metadata["country"] == "France"
test_metadata["conDen"] = test_metadata["country"] == "Denmark"
test_metadata["conNet"] = test_metadata["country"] == "Netherlands"
test_metadata["conIta"] = test_metadata["country"] == "Italy"
test_metadata["conAus"] = test_metadata["country"] == "Austria"
test_metadata["conOther"] = ~test_metadata["country"].isin(["France","Denmark","Netherlands","Italy","Austria"])
test_elevation = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "Elevation", "GLC25-PA-test-elevation.csv"), index_col=0).sort_index()
test_elevation = test_elevation.loc[test_elevation.index.isin(test_metadata.index)]
test_elevation['Elevation'].fillna((test_elevation['Elevation'].mean()), inplace=True)
test_soil = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "SoilGrids", "GLC25-PA-test-soilgrids.csv"), index_col=0).sort_index()
test_soil = test_soil.loc[test_soil.index.isin(test_metadata.index)]
for column in test_soil.columns: test_soil[column].fillna((test_soil[column].mean()), inplace=True)
test_worldcover = pd.read_csv(os.path.join(path_data, "worldcover", "pa_test_survey_points_with_worldcover.csv"), index_col=0).sort_index()
test_wcdummy = pd.get_dummies(test_worldcover["class"], prefix="wc")
test_wcdummy.drop(columns="wc_100", inplace=True)
# test_wcdummy.insert(6, "wc_70", False)
test_landcover = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "LandCover", "GLC25-PA-test-landcover.csv"), index_col=0).sort_index()
test_landcover = test_landcover.loc[test_landcover.index.isin(test_metadata.index)]
test_landcover = test_landcover.iloc[:, landcover_col_ind]

print("All surveyId match: ", (test_metadata.index==test_elevation.index).all() and (test_metadata.index==test_soil.index).all() \
     and (test_metadata.index==test_worldcover.index).all() and (test_metadata.index==test_landcover.index).all())
test_combined = pd.concat([test_metadata, test_elevation.Elevation, test_soil, test_wcdummy, test_landcover], axis=1)
test_combined.loc[:,cov_columns] = (test_combined.loc[:,cov_columns] - cov_norm_coef.loc["mean"]) / cov_norm_coef.loc["std"]
test_combined.reset_index(drop=True, inplace=True)


All surveyId match:  True


In [7]:
pred = pd.read_csv("/home/gt/gdrive/geolifeclef/prithvi/0424_225841_addlandcover_swapDOorder_testfail/0425_012850_e075_vloss0.008629_vf0.3079.csv", index_col=0)
pred = pd.concat([test_combined.set_index("surveyId"), pred], axis=1)
pred_ok = pred.loc[~pred.predictions.isna()]
pred_na = pred.loc[pred.predictions.isna()]

In [8]:
tmp = pred_na.describe()
tmp.shape

(8, 37)

In [9]:
with pd.option_context('display.max_rows', 20, 'display.max_columns', None): 
    display(pd.concat([pred_na.describe(), 0*pred_na.describe().iloc[:1], pred_ok.describe()]))

Unnamed: 0,lon,lat,year,geoUncertaintyInM,area,areaLog,conFra,conDen,conNet,conIta,conAus,conOther,Elevation,Soilgrid-bdod,Soilgrid-cec,Soilgrid-cfvo,Soilgrid-clay,Soilgrid-nitrogen,Soilgrid-phh2o,Soilgrid-sand,Soilgrid-silt,Soilgrid-soc,wc_10,wc_20,wc_30,wc_40,wc_50,wc_60,wc_80,wc_90,LandCover-1,LandCover-3,LandCover-4,LandCover-6,LandCover-9,LandCover-12,LandCover-13
count,8607.0,8607.0,8607.0,7990.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0,8607.0
mean,20.304084,46.447843,2018.377019,45.267835,46.672182,-0.700792,0.009062,0.0,0.0,0.012316,0.002091,0.976531,1.209683,0.638797,0.991303,0.337557,1.521158,0.054171,0.811955,-1.257532,0.953712,-0.552832,0.253166,0.004764,0.441733,0.151853,0.0682,0.036947,0.010457,0.032183,0.182285,0.124407,0.674193,0.073148,0.4345,0.083815,0.115633
std,11.199327,3.792022,1.255135,308.221535,102.046194,0.916881,0.09477,0.0,0.0,0.110296,0.045686,0.151397,1.924348,1.599696,1.041423,0.944642,0.873844,1.377375,0.973238,0.623431,0.547055,1.108656,0.434851,0.068858,0.496622,0.358899,0.252104,0.188642,0.101727,0.176496,0.626324,1.381657,0.890214,1.052188,0.812897,1.229791,0.722952
min,-6.81673,35.828664,2017.0,0.0,1.0,-2.553002,0.0,0.0,0.0,0.0,0.0,0.0,-0.411543,-4.639726,-1.739516,-2.429179,-1.621039,-1.983258,-2.797776,-2.975847,-1.394923,-2.013711,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.731409,-1.518093,-1.938199,-9.440161,-2.869573,-0.355863,-4.218206
25%,8.888392,42.85423,2017.0,0.0,10.0,-1.015741,0.0,0.0,0.0,0.0,0.0,1.0,-0.161046,-0.687453,0.431651,-0.308162,1.101739,-0.983778,0.43403,-1.739935,0.711753,-1.430371,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.35374,-1.081651,0.018899,-0.541754,-0.098826,-0.355863,0.236219
50%,23.71203,46.562233,2018.0,1.0,16.0,-0.701955,0.0,0.0,0.0,0.0,0.0,1.0,0.249489,0.629971,0.837254,0.391011,1.503308,-0.155925,0.613575,-1.222188,0.858304,-0.741475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.056531,-0.208768,0.997448,0.411647,1.088638,-0.355863,0.236219
75%,30.352688,48.221248,2019.0,5.0,25.0,-0.404003,0.0,0.0,0.0,0.0,0.0,1.0,1.86032,1.83761,1.600741,0.959276,2.178167,0.944513,1.511299,-0.932001,1.316277,0.102978,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.537886,0.227673,0.997448,0.835381,1.088638,-0.355863,0.236219
max,34.961744,70.410833,2021.0,10000.0,1000.0,2.058781,1.0,0.0,0.0,1.0,1.0,1.0,8.557618,4.252887,5.966933,4.94265,5.015074,7.032257,3.665836,1.444486,3.82597,9.819745,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.023929,2.846322,1.975997,1.047248,2.671922,5.103373,0.236219
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
count,6177.0,6177.0,6177.0,5954.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0,6177.0


In [10]:
with pd.option_context('display.max_rows', 10, 'display.max_columns', None): 
    display(train_combined.describe())

Unnamed: 0,surveyId,lat,lon,area,areaLog,conFra,conDen,conNet,conIta,conAus,conOther,Elevation,Soilgrid-bdod,Soilgrid-cec,Soilgrid-cfvo,Soilgrid-clay,Soilgrid-nitrogen,Soilgrid-phh2o,Soilgrid-sand,Soilgrid-silt,Soilgrid-soc,wc_10,wc_20,wc_30,wc_40,wc_50,wc_60,wc_80,wc_90,LandCover-1,LandCover-3,LandCover-4,LandCover-6,LandCover-9,LandCover-12,LandCover-13
count,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0
mean,1963014.0,52.777935,8.542301,118.648592,-9.326238000000001e-17,0.151719,0.554508,0.168834,0.02924,0.014171,0.081529,-9.022816000000001e-18,1.503177e-15,4.618404e-16,3.138023e-16,-1.44365e-16,-4.611218e-17,1.021415e-15,2.529582e-16,3.00867e-16,9.258367000000001e-17,0.386281,0.00781,0.482924,0.030757,0.009676,0.026599,0.011193,0.044209,-1.866046e-16,-1.453232e-17,-5.042397e-17,4.931408e-16,2.250115e-16,1.421293e-17,-2.135932e-16
std,1134918.0,4.653078,3.908661,190.802747,1.0,0.35875,0.497023,0.374607,0.16848,0.118195,0.273647,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.486899,0.08803,0.499711,0.17266,0.097888,0.16091,0.105202,0.20556,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,212.0,35.11705,-9.018346,0.04,-4.702,0.0,0.0,0.0,0.0,0.0,0.0,-0.4115426,-4.31037,-3.0279,-1.782527,-1.671028,-1.902492,-3.156865,-2.775429,-2.356667,-2.041489,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.028618,-1.518093,-1.938199,-9.440161,-3.001514,-0.3558634,-8.672632
25%,974945.5,51.12027,5.918149,25.0,-0.4040032,0.0,0.0,0.0,0.0,0.0,0.0,-0.3802305,-0.6874534,-0.6658617,-0.6444205,-0.7212268,-0.6203302,-0.6432385,-0.7044415,-0.6072095,-0.6803635,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.35374,-1.081651,-0.9596502,-0.4358202,-0.2307659,-0.3558634,0.2362192
50%,1961189.0,55.21734,9.13135,79.0,0.3641463,0.0,1.0,0.0,0.0,0.0,0.0,-0.3419603,-0.02874128,6.781128e-16,-0.2564295,-0.2088339,-0.1155421,-0.1046042,0.1751714,-0.1034391,-0.04146806,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.05653134,0.227673,0.01889893,0.1997803,-0.09882554,-0.3558634,0.2362192
75%,2951540.0,56.257105,10.20494,79.0,0.3641463,0.0,1.0,0.0,0.0,0.0,0.0,-0.2027956,0.5201855,0.6940998,0.2091597,0.5660041,0.3286714,0.43403,0.6483809,0.5377232,0.4640927,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.537886,0.227673,0.997448,0.7294474,1.088638,-0.3558634,0.2362192
max,3919655.0,59.893,29.154211,8000.0,3.447066,1.0,1.0,1.0,1.0,1.0,1.0,10.91646,3.813746,4.63083,6.98607,4.427697,9.596581,3.665836,2.134815,4.073275,8.586399,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.023929,2.846322,1.975997,1.047248,2.671922,5.103373,0.2362192


In [11]:
test_landcover.describe()

Unnamed: 0,LandCover-1,LandCover-3,LandCover-4,LandCover-6,LandCover-9,LandCover-12,LandCover-13
count,14784.0,14784.0,14784.0,14784.0,14784.0,14784.0,14784.0
mean,9.922213,3.995586,4.663538,89.626488,23.426052,0.725176,1.957589
std,3.224896,2.893699,2.062316,9.826064,7.532894,2.00366,0.201866
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,8.0,1.0,4.0,84.0,21.0,0.0,2.0
50%,10.0,4.0,4.5,93.0,22.0,0.0,2.0
75%,12.0,6.0,6.0,97.0,31.0,0.0,2.0
max,17.0,10.0,8.0,99.0,43.0,9.0,2.0


In [12]:
train_landcover.describe()

Unnamed: 0,LandCover-1,LandCover-3,LandCover-4,LandCover-6,LandCover-9,LandCover-12,LandCover-13
count,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0,88987.0
mean,10.190208,3.478342,3.961374,89.114095,22.749017,0.58667,1.94697
std,3.364639,2.291258,2.043842,9.439892,7.579182,1.648582,0.224496
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,9.0,1.0,2.0,85.0,21.0,0.0,2.0
50%,10.0,4.0,4.0,91.0,22.0,0.0,2.0
75%,12.0,4.0,6.0,96.0,31.0,0.0,2.0
max,17.0,10.0,8.0,99.0,43.0,9.0,2.0


In [14]:
test_landcover

Unnamed: 0_level_0,LandCover-1,LandCover-3,LandCover-4,LandCover-6,LandCover-9,LandCover-12,LandCover-13
surveyId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
642,5.0,6.00,4.00,97.0,15.0,0.00,2.0
1792,10.0,1.00,6.00,87.0,31.0,2.25,2.0
3256,10.0,1.00,6.00,95.0,31.0,0.00,2.0
3855,10.0,1.00,6.00,96.0,32.0,0.00,2.0
4889,9.0,4.00,1.00,91.0,22.0,0.00,2.0
...,...,...,...,...,...,...,...
5010108,12.0,3.00,5.00,87.0,31.0,0.00,2.0
5010109,17.0,2.25,1.75,99.0,3.0,2.00,1.0
5010110,17.0,2.25,1.75,99.0,3.0,2.00,1.0
5010111,17.0,2.25,1.75,99.0,3.0,2.00,1.0


In [15]:
test_landcover.tail(30)

Unnamed: 0_level_0,LandCover-1,LandCover-3,LandCover-4,LandCover-6,LandCover-9,LandCover-12,LandCover-13
surveyId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5010083,5.0,7.0,1.0,96.0,15.0,0.0,2.0
5010084,8.0,4.0,1.0,79.0,21.0,0.0,2.0
5010085,5.0,7.0,1.0,96.0,15.0,0.0,2.0
5010086,5.0,7.0,1.0,96.0,15.0,0.0,2.0
5010087,1.0,7.0,1.0,95.0,11.0,0.0,2.0
5010088,12.0,3.0,5.0,99.0,31.0,0.0,2.0
5010089,5.0,6.0,4.0,99.0,15.0,0.0,2.0
5010090,5.0,6.0,4.0,97.0,15.0,0.0,2.0
5010091,1.0,7.0,1.0,97.0,11.0,0.0,2.0
5010092,5.0,7.0,1.0,82.0,15.0,0.0,2.0


In [16]:
test_landcover[test_landcover.index>=5000000]

Unnamed: 0_level_0,LandCover-1,LandCover-3,LandCover-4,LandCover-6,LandCover-9,LandCover-12,LandCover-13
surveyId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5000000,8.0,4.00,1.00,87.0,21.0,0.0,2.0
5000001,8.0,4.00,1.00,75.0,21.0,0.0,2.0
5000002,8.0,4.00,1.00,69.0,21.0,0.0,2.0
5000003,8.0,4.00,1.00,77.0,21.0,0.0,2.0
5000004,9.0,4.00,1.00,85.0,22.0,0.0,2.0
...,...,...,...,...,...,...,...
5010108,12.0,3.00,5.00,87.0,31.0,0.0,2.0
5010109,17.0,2.25,1.75,99.0,3.0,2.0,1.0
5010110,17.0,2.25,1.75,99.0,3.0,2.0,1.0
5010111,17.0,2.25,1.75,99.0,3.0,2.0,1.0


In [17]:
test_landcover = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "LandCover", "GLC25-PA-test-landcover.csv"), index_col=0).sort_index()
test_landcover = test_landcover.loc[test_landcover.index.isin(test_metadata.index)]
test_landcover.loc[test_landcover.nunique(1) <= 1]

Unnamed: 0_level_0,LandCover-1,LandCover-2,LandCover-3,LandCover-4,LandCover-5,LandCover-6,LandCover-7,LandCover-8,LandCover-9,LandCover-10,LandCover-11,LandCover-12,LandCover-13
surveyId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
920308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
test_landcover = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "LandCover", "GLC25-PA-test-landcover.csv"), index_col=0).sort_index()
test_landcover.loc[test_landcover.nunique(1) <= 1]

Unnamed: 0_level_0,LandCover-1,LandCover-2,LandCover-3,LandCover-4,LandCover-5,LandCover-6,LandCover-7,LandCover-8,LandCover-9,LandCover-10,LandCover-11,LandCover-12,LandCover-13
surveyId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
920308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
train_landcover = pd.read_csv(os.path.join(path_data, "EnvironmentalValues", "LandCover", "GLC25-PA-train-landcover.csv"), index_col=0)
train_landcover.loc[train_landcover.nunique(1) <= 1].index

Index([277331, 797067, 1295919, 2081447, 2436311, 2820287, 3373101, 3496885], dtype='int64', name='surveyId')

In [None]:
test_landcover

In [None]:
10114 / 14829