### 1 import required packages

In [20]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

### 2 import plot

In [21]:
#### load the data from : /mnt/poseidon/remotesensing/arctic/data/training
import os
cwd = '/mnt/poseidon/remotesensing/arctic/data/training/Test_03/'
os.chdir(cwd)
os.getcwd


<function posix.getcwd()>

#### load the fractional cover info (response)

### akava

In [76]:
#### load the plot of interest
akava = pd.read_csv('AKAVA_pft_fcover_01.csv')

#### only plot with very small sizes needs to be processed, the other remains the same, discarding all plots earlier than 2010
akava_small = akava[(akava['Releve area (m2)']<10) &  (akava['year']>=2010)]

In [78]:
selected_columns = ['Releve number','year','Latitude (decimal degrees)', 'Longitude (decimal degrees)']
akava_small = akava_small[selected_columns]
# akava_small = akava.fillna(0)
akava_small.head()

In [81]:
### transform geographic to utm so that distance caculation is more intuitive
from pyproj import Transformer
src_crs = "EPSG:4326"
target_crs = "EPSG:32606"
transformer = Transformer.from_crs(src_crs, target_crs)

lon = akava_small['Longitude (decimal degrees)'].to_numpy()
lat = akava_small['Latitude (decimal degrees)'].to_numpy()

#### store the projected coords
projcoords = []   
for i in range(0,akava_small.shape[0]):
    xcoord, ycoord = lon[i],lat[i]
    projcoords.append(transformer.transform(ycoord,xcoord))

In [85]:
#### calculate the distance matrix of all small plots (for examination)
from scipy.spatial.distance import cdist
coord = np.array(projcoords)
dist_mat = cdist(coord, coord, 'euclidean')
# dist_mat[:3,:] <= 60

In [92]:
### group pixels/plots based on their euclidean distance
def group_pixels_by_distance(pixel_data, distance_threshold):
    """
        pixel_data: ndarray of coordinate pair: n by 2, default is utm projection
        distance_threshold: threshold used for grouping, default is 60m
        
        return:
        a list of alues indicating the group id of each pixel
    
    """
    cluster_id = 0
    pixel_clusters = {}
   
    def expand_cluster(pixel, cluster_id):
        if pixel_clusters.get(cluster_id) is None:
            pixel_clusters[cluster_id] = []
       
        pixel_clusters[cluster_id].append(pixel)

    cluster_array = np.full(len(pixel_data), -1)  # Initialize with -1 (unassigned)
   
    for i, pixel in enumerate(pixel_data):
        assigned = False
       
        for c_id, cluster_pixels in pixel_clusters.items():
            cluster_pixels = np.array(cluster_pixels)
            distances = np.linalg.norm(cluster_pixels - pixel, axis=1)
            if np.any(distances <= distance_threshold):
                expand_cluster(pixel, c_id)
                assigned = True
                cluster_array[i] = c_id
                break
       
        if not assigned:
            expand_cluster(pixel, cluster_id)
            cluster_array[i] = cluster_id
            cluster_id += 1

    return cluster_array

dist_thres = 100
coord = np.array(projcoords)  ## UTM coords, unit is meter
group = group_pixels_by_distance(coord, dist_thres)

In [93]:
### add the group id to df for aggregation
akava_small['group_id'] = group
akava_small.head()

Unnamed: 0,Releve number,year,Latitude (decimal degrees),Longitude (decimal degrees),group
156,11591,2010,71.294681,-156.686699,0
157,11592,2010,71.294297,-156.686482,0
158,11593,2010,71.289531,-156.689706,1
159,11594,2010,71.288841,-156.68089,2
160,11595,2010,71.289509,-156.686935,1


### remaining to do
1. group baby plots to mother one based on group_id
2. use the mean coord of all baby plots as the coord of the mother plot and create new id (releve number) for mother, say "mother_1"of 
3. use the mean fcover for all PFTs as the fcover of the mother plot
4. may add the releve number of all plots to a single column ("baby plots") for each mother as a reference


In [71]:
# def group_pixels_by_distance(coord, dist_thres):
#     grouped_pixels = []
#     remaining_pixels = list(coord)

#     while remaining_pixels:
#         current_pixel = remaining_pixels[0]
#         current_group = [current_pixel]
#         remaining_pixels.remove(current_pixel)

#         i = 0
#         while i < len(remaining_pixels):
#             pixel = remaining_pixels[i]
#             distance = np.linalg.norm(np.array(current_pixel) - np.array(pixel))

#             if distance <= dist_thres:
#                 current_group.append(pixel)
#                 remaining_pixels.pop(i)
#             else:
#                 i += 1

#         grouped_pixels.append(current_group)

#     return grouped_pixels

In [8]:
akveg = pd.read_csv('AKVEG_pft_fcover_'+ver+'.csv')
akveg = akveg[['Site Code', 'Initial Project', 'Plot Dimensions', 'Year',
               'Latitude', 'Longitude', 'Uncertainty',
               'bryophyte cover (%)',
       'deciduous dwarf shrub cover (%)',
       'deciduous dwarf to low shrub cover (%)',
       'deciduous dwarf to tall shrub cover (%)', 
       'evergreen dwarf shrub cover (%)',
       'evergreen dwarf to low shrub cover (%)',
       'forb cover (%)', 'graminoid cover (%)', 
               'lichen cover (%)','soil cover (%)', 
               'rock cover (%)', 'fungus cover (%)',
       'water cover (%)','source']]
akveg = akveg.fillna(0)

akveg_fcover = akveg[['bryophyte cover (%)',
       'deciduous dwarf shrub cover (%)',
       'deciduous dwarf to low shrub cover (%)',
       'deciduous dwarf to tall shrub cover (%)', 
       'evergreen dwarf shrub cover (%)',
       'evergreen dwarf to low shrub cover (%)',
       'forb cover (%)', 'graminoid cover (%)', 
               'lichen cover (%)','soil cover (%)', 'rock cover (%)','fungus cover (%)',
       'water cover (%)']]

akveg_fcover['deciduous shrub'] = akveg_fcover[['deciduous dwarf shrub cover (%)',
       'deciduous dwarf to low shrub cover (%)',
       'deciduous dwarf to tall shrub cover (%)']].sum(axis = 1)
akveg_fcover['evergreen shrub'] = akveg_fcover[['evergreen dwarf shrub cover (%)',
       'evergreen dwarf to low shrub cover (%)']].sum(axis = 1)
akveg_fcover['non-vascular'] = akveg_fcover[['bryophyte cover (%)','lichen cover (%)']].sum(axis = 1)
akveg_fcover['bare soil'] = akveg_fcover[['soil cover (%)', 'rock cover (%)']].sum(axis = 1)

akveg_fcover = akveg_fcover[['non-vascular', 
       'forb cover (%)', 'graminoid cover (%)', 
               'deciduous shrub','evergreen shrub','bare soil',
       'water cover (%)','fungus cover (%)']]
akveg_fcover = akveg_fcover.rename(columns = { 'forb cover (%)':  'forb', 
                                              'graminoid cover (%)': 'graminoid',
                                              'water cover (%)': 'water'})
akveg_fcover['total veg'] = 100 - akveg_fcover[['bare soil','water','fungus cover (%)']].sum(axis = 1)
display(akveg_fcover.head())

# normalize the PFT's fcover to top cover (divide by the sum and scaled by the total veg)
akveg_fcover[['non-vascular', 'forb', 'graminoid', 'deciduous shrub','evergreen shrub']] = akveg_fcover[['non-vascular', 
       'forb', 'graminoid', 'deciduous shrub','evergreen shrub']].divide(akveg_fcover[['non-vascular', 
       'forb', 'graminoid', 'deciduous shrub','evergreen shrub']].sum(axis = 1), axis = 0).multiply(akveg_fcover['total veg'], axis=0)

akveg_fcover = akveg_fcover[akveg_fcover.columns[:7]]/100
               
akveg_aux = akveg[['Site Code', 'Latitude', 'Longitude', 'Plot Dimensions','Uncertainty','Year','source']]
akveg_aux = akveg_aux.rename(columns = { 'Site Code':  'UID', 
                                              'Latitude': 'lat',
                                              'Longitude': 'lon',
                                       'Plot Dimensions':'plot size',
                                        'Uncertainty':'geoloc error'})

display(akveg_fcover.head())
display(akveg_aux.head())


Unnamed: 0,non-vascular,forb,graminoid,deciduous shrub,evergreen shrub,bare soil,water,fungus cover (%),total veg
0,12.7,1.4,0.7,0.0,0.0,20.666667,0.0,0.0,79.333333
1,16.8,2.7,6.7,14.7,22.0,14.0,0.0,0.0,86.0
2,10.1,4.2,3.3,10.0,18.7,14.0,0.0,0.0,86.0
3,67.8,5.5,6.0,19.3,52.6,14.666667,0.0,0.0,85.333333
4,15.6,27.5,7.4,14.7,30.0,27.333333,0.0,0.0,72.666667


Unnamed: 0,non-vascular,forb,graminoid,deciduous shrub,evergreen shrub,bare soil,water
0,0.680766,0.075045,0.037523,0.0,0.0,0.206667,0.0
1,0.229698,0.036916,0.091606,0.200986,0.300795,0.14,0.0
2,0.187603,0.078013,0.061296,0.185745,0.347343,0.14,0.0
3,0.382646,0.031041,0.033862,0.108924,0.296861,0.146667,0.0
4,0.119076,0.209909,0.056485,0.112206,0.228992,0.273333,0.0


Unnamed: 0,UID,lat,lon,plot size,geoloc error,Year,source
0,AB-1B,68.294218,-156.035416,55 radius,3.0,2012,AKVEG
1,AB-5B,68.328156,-156.216436,55 radius,3.0,2012,AKVEG
2,ADST-2,68.242462,-156.117486,30 radius,3.0,2012,AKVEG
3,ADST-21,68.721845,-156.470093,30 radius,3.0,2013,AKVEG
4,ADST-33,68.707473,-156.588315,30 radius,3.0,2013,AKVEG


In [9]:
# display(akveg_aux.Year)

In [10]:
akveg_fcover.to_csv(os.path.join(result_path, f'fcover_AkVEG.csv'))
akveg_aux.to_csv(os.path.join(result_path, f'aux_AkVEG.csv'))