In [35]:
import geopandas as gpd
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score


In [36]:
merged_data = gpd.read_file("data/merged_data_our3months_6bands_3x3.geojson")

In [37]:
# Assuming your data is loaded into df
# Get unique species
unique_species = merged_data['l3_species'].unique()

# Create a new label DataFrame with rows = images and columns = species, initialized to 0
merged_data['id'] = range(len(merged_data))
labels = pd.DataFrame(0, index=merged_data['id'], columns=unique_species)

# Populate the label DataFrame
for idx, row in merged_data.iterrows():
    species = row['l3_species']
    labels.loc[row['id'], species] = 1

# Reset the index for better readability (optional)
labels.reset_index(inplace=True)
labels.rename(columns={'index': 'id'}, inplace=True)
labels = labels.iloc[:, 1:]

# Show the resulting DataFrame
print(labels)

       european beech  cherry  european ash  linden  sycamore maple  \
0                   1       0             0       0               0   
1                   1       0             0       0               0   
2                   1       0             0       0               0   
3                   1       0             0       0               0   
4                   1       0             0       0               0   
...               ...     ...           ...     ...             ...   
14954               0       0             0       0               0   
14955               0       0             0       0               0   
14956               0       0             0       0               0   
14957               0       0             0       0               0   
14958               0       0             0       0               0   

       english oak  red oak  sessile oak  alder  birch  poplar  douglas fir  \
0                0        0            0      0      0       0      

In [38]:
labels = labels.values

print(labels)

[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]]


In [39]:
#labels_np = np.array(labels)
print(labels[0:10, :])
print(type(labels))
print(merged_data.head())

[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
<class 'numpy.ndarray'>
   id                                              B11_1  \
0   0  [ [ 0.31589999794960022, 0.31589999794960022, ...   
1   1  [ [ 0.20379999279975891, 0.20379999279975891, ...   
2   2  [ [ 0.2531999945640564, 0.2531999945640564, 0....   
3   3  [ [ 0.3093000054359436, 0.3093000054359436, 0....   
4   4  [ [ 0.30430001020431519, 0.30430001020431519, ...   

                                               B11_3  \
0  [ [ 0.34424999356269836, 0.34424999356269836, ...   
1  [ [ 0.25734999775886536, 0.25734999775886536, ...   
2  [ [ 0.26635000109672546, 0.266350

In [51]:
band_data = merged_data.iloc[:, :-4]
print(band_data.head())
#band_data = filtered_data.to_numpy()  # Convert to numpy array of shape (37901, 4)

   id                                              B11_1  \
0   0  [ [ 0.31589999794960022, 0.31589999794960022, ...   
1   1  [ [ 0.20379999279975891, 0.20379999279975891, ...   
2   2  [ [ 0.2531999945640564, 0.2531999945640564, 0....   
3   3  [ [ 0.3093000054359436, 0.3093000054359436, 0....   
4   4  [ [ 0.30430001020431519, 0.30430001020431519, ...   

                                               B11_3  \
0  [ [ 0.34424999356269836, 0.34424999356269836, ...   
1  [ [ 0.25734999775886536, 0.25734999775886536, ...   
2  [ [ 0.26635000109672546, 0.26635000109672546, ...   
3  [ [ 0.26510000228881836, 0.26510000228881836, ...   
4  [ [ 0.27709999680519104, 0.27709999680519104, ...   

                                               B11_7  \
0  [ [ 0.28600001335144043, 0.28600001335144043, ...   
1  [ [ 0.16795000433921814, 0.16795000433921814, ...   
2  [ [ 0.23000000417232513, 0.23000000417232513, ...   
3  [ [ 0.24770000576972961, 0.24770000576972961, ...   
4  [ [ 0.2660000026226

In [52]:
# Convert each element in the 'B2_3', 'B4_3', 'B8_3', 'NDVI_3' columns into actual lists
import ast

band_data['B3_1'] = band_data['B3_1'].apply(ast.literal_eval)
band_data['B3_3'] = band_data['B3_3'].apply(ast.literal_eval)
band_data['B3_7'] = band_data['B3_7'].apply(ast.literal_eval)

band_data['B4_1'] = band_data['B4_1'].apply(ast.literal_eval)
band_data['B4_3'] = band_data['B4_3'].apply(ast.literal_eval)
band_data['B4_7'] = band_data['B4_7'].apply(ast.literal_eval)

band_data['B6_1'] = band_data['B6_1'].apply(ast.literal_eval)
band_data['B6_3'] = band_data['B6_3'].apply(ast.literal_eval)
band_data['B6_7'] = band_data['B6_7'].apply(ast.literal_eval)

band_data['B8_1'] = band_data['B8_1'].apply(ast.literal_eval)
band_data['B8_3'] = band_data['B8_3'].apply(ast.literal_eval)
band_data['B8_7'] = band_data['B8_7'].apply(ast.literal_eval)

band_data['B11_1'] = band_data['B11_1'].apply(ast.literal_eval)
band_data['B11_3'] = band_data['B11_3'].apply(ast.literal_eval)
band_data['B11_7'] = band_data['B11_7'].apply(ast.literal_eval)

band_data['NDVI_1'] = band_data['NDVI_1'].apply(ast.literal_eval)
band_data['NDVI_3'] = band_data['NDVI_3'].apply(ast.literal_eval)
band_data['NDVI_7'] = band_data['NDVI_7'].apply(ast.literal_eval)

In [53]:
# Initialize the 4D array to store the reshaped data
rows = len(band_data)
X = np.zeros((rows, 3, 3, 18), dtype=np.float32)

# Loop through the rows and fill the 4D array with the band data
for i in range(rows):
    # Convert the list of lists into a NumPy array for each band
    band_b3_1 = np.array(band_data['B3_1'][i], dtype=np.float32)
    band_b3_3 = np.array(band_data['B3_3'][i], dtype=np.float32)
    band_b3_7 = np.array(band_data['B3_7'][i], dtype=np.float32)
    
    band_b4_1 = np.array(band_data['B4_1'][i], dtype=np.float32)  
    band_b4_3 = np.array(band_data['B4_3'][i], dtype=np.float32)
    band_b4_7 = np.array(band_data['B4_7'][i], dtype=np.float32)
    
    band_b6_1 = np.array(band_data['B6_1'][i], dtype=np.float32) 
    band_b6_3 = np.array(band_data['B6_3'][i], dtype=np.float32) 
    band_b6_7 = np.array(band_data['B6_7'][i], dtype=np.float32) 
    
    band_b8_1 = np.array(band_data['B8_1'][i], dtype=np.float32)
    band_b8_3 = np.array(band_data['B8_3'][i], dtype=np.float32)
    band_b8_7 = np.array(band_data['B8_7'][i], dtype=np.float32)
    
    band_b11_1 = np.array(band_data['B11_1'][i], dtype=np.float32)  
    band_b11_3 = np.array(band_data['B11_3'][i], dtype=np.float32)  
    band_b11_7 = np.array(band_data['B11_7'][i], dtype=np.float32)  
    
    band_ndvi_1 = np.array(band_data['NDVI_1'][i], dtype=np.float32)  
    band_ndvi_3 = np.array(band_data['NDVI_3'][i], dtype=np.float32) 
    band_ndvi_7 = np.array(band_data['NDVI_7'][i], dtype=np.float32) 

    # Stack the bands together into a 3D array (11, 11, 4)
    # This will create a 3D array where each band is along the 3rd axis (depth)
    X[i, :, :, 0] = band_b3_1
    X[i, :, :, 1] = band_b3_3
    X[i, :, :, 2] = band_b3_7
    
    X[i, :, :, 3] = band_b4_1
    X[i, :, :, 4] = band_b4_3
    X[i, :, :, 5] = band_b4_7
    
    X[i, :, :, 6] = band_b6_1
    X[i, :, :, 7] = band_b6_3
    X[i, :, :, 8] = band_b6_7
    
    X[i, :, :, 9] = band_b8_1
    X[i, :, :, 10] = band_b8_3
    X[i, :, :, 11] = band_b8_7
    
    X[i, :, :, 12] = band_b11_1
    X[i, :, :, 13] = band_b11_3
    X[i, :, :, 14] = band_b11_7
    
    X[i, :, :, 15] = band_ndvi_1
    X[i, :, :, 16] = band_ndvi_3
    X[i, :, :, 17] = band_ndvi_7

In [54]:
print(X.shape)

(14959, 3, 3, 18)


In [55]:
np.save('data/3x3_data_nparray.npy', X)

In [31]:
unique_leaf = merged_data['l1_leaf_types'].unique()

# Create a new label DataFrame with rows = images and columns = species, initialized to 0

leaf_labels = pd.DataFrame(0, index=merged_data['id'], columns=unique_leaf)

# Populate the label DataFrame
for idx, row in merged_data.iterrows():
    leaf = row['l1_leaf_types']
    leaf_labels.loc[row['id'], leaf] = 1

# Reset the index for better readability (optional)
leaf_labels.reset_index(inplace=True)
leaf_labels.rename(columns={'index': 'id'}, inplace=True)
leaf_labels = leaf_labels.iloc[:, 1:]

# Show the resulting DataFrame
print(leaf_labels)

leaf_labels = leaf_labels.values

       broadleaf  needleleaf
0              1           0
1              1           0
2              1           0
3              1           0
4              1           0
...          ...         ...
86440          0           1
86441          0           1
86442          0           1
86443          0           1
86444          0           1

[86445 rows x 2 columns]


In [32]:
np.save('data/leaf_labels_nparray.npy', leaf_labels)