In [1]:
import geopandas as gpd
import pandas as pd
import os
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score

In [2]:
from torch import nn
from torchvision import models
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch

In [3]:
merged_data = gpd.read_file("merged_data.geojson")

In [4]:
# Assuming your data is loaded into df
# Get unique species
unique_species = merged_data['l3_species'].unique()

# Create a new label DataFrame with rows = images and columns = species, initialized to 0
labels = pd.DataFrame(0, index=merged_data['id'], columns=unique_species)

# Populate the label DataFrame
for idx, row in merged_data.iterrows():
    species = row['l3_species']
    labels.loc[row['id'], species] = 1

# Reset the index for better readability (optional)
labels.reset_index(inplace=True)
labels.rename(columns={'index': 'id'}, inplace=True)
labels = labels.iloc[:, 1:]

# Show the resulting DataFrame
print(labels)

       european beech  cherry  european ash  linden  sycamore maple  \
0                   1       0             0       0               0   
1                   1       0             0       0               0   
2                   1       0             0       0               0   
3                   1       0             0       0               0   
4                   1       0             0       0               0   
...               ...     ...           ...     ...             ...   
37896               0       0             0       0               0   
37897               0       0             0       0               0   
37898               0       0             0       0               0   
37899               0       0             0       0               0   
37900               0       0             0       0               0   

       english oak  red oak  sessile oak  alder  birch  poplar  douglas fir  \
0                0        0            0      0      0       0      

In [5]:
labels = labels.values

print(labels)

[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]]


In [6]:
#labels_np = np.array(labels)
print(labels[0:10, :])
print(type(labels))

[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
<class 'numpy.ndarray'>


In [7]:
band_data = merged_data.iloc[:, 1:-4]

In [8]:
# Convert each element in the 'B2_3', 'B4_3', 'B8_3', 'NDVI_3' columns into actual lists
import ast

band_data['B2_3'] = band_data['B2_3'].apply(ast.literal_eval)
band_data['B4_3'] = band_data['B4_3'].apply(ast.literal_eval)
band_data['B8_3'] = band_data['B8_3'].apply(ast.literal_eval)
band_data['NDVI_3'] = band_data['NDVI_3'].apply(ast.literal_eval)

In [9]:
# Initialize the 4D array to store the reshaped data
rows = len(band_data)
X = np.zeros((rows, 11, 11, 4), dtype=np.float32)

# Loop through the rows and fill the 4D array with the band data
for i in range(rows):
    # Convert the list of lists into a NumPy array for each band
    band_b2 = np.array(band_data['B2_3'][i], dtype=np.float32)  # Shape (11, 11)
    band_b4 = np.array(band_data['B4_3'][i], dtype=np.float32)  # Shape (11, 11)
    band_b8 = np.array(band_data['B8_3'][i], dtype=np.float32)  # Shape (11, 11)
    band_ndvi = np.array(band_data['NDVI_3'][i], dtype=np.float32)  # Shape (11, 11)

    # Stack the bands together into a 3D array (11, 11, 4)
    # This will create a 3D array where each band is along the 3rd axis (depth)
    X[i, :, :, 2] = band_b2
    X[i, :, :, 0] = band_b4
    X[i, :, :, 1] = band_b8
    X[i, :, :, 3] = band_ndvi

In [10]:
number_samples = X.shape[0]
#X_res = np.reshape(X, (number_samples, 11*11*4))
#y_res = np.argmax(labels, axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=24, shuffle=True)

In [11]:
model = models.resnet50(pretrained=True)

#model.features[0][0] = nn.Conv2d(4, model.features[0][0].out_channels, kernel_size=3, stride=2, padding=1, bias=False)

original_conv1 = model.conv1
model.conv1 = nn.Conv2d(
    in_channels=4,  # Change the input channels to 4
    out_channels=original_conv1.out_channels,
    kernel_size=original_conv1.kernel_size,
    stride=original_conv1.stride,
    padding=original_conv1.padding,
    bias=original_conv1.bias
)

with torch.no_grad():
    model.conv1.weight[:, :3, :, :] = original_conv1.weight  # Copy the weights of the first three channels
    model.conv1.weight[:, 3, :, :] = original_conv1.weight[:, 0, :, :] 



In [75]:
num_classes = 19 
#model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

NameError: name 'long' is not defined

In [71]:
X_tensor = torch.from_numpy(X_train).float().permute(0, 3, 1, 2)
y_tensor = torch.from_numpy(y_train).long()
print(y_tensor)

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 1],
        [0, 0, 0,  ..., 0, 0, 1],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0]])


In [72]:
dataset = TensorDataset(X_tensor, y_tensor)

# Crea un DataLoader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [73]:
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    

    for inputs, labels in dataloader:

        #inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)  
        loss = criterion(outputs, labels)  
        
        loss.backward()  
        optimizer.step()  
        
        running_loss += loss.item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader)}')

# Salva il modello
torch.save(model.state_dict(), 'resnet50_finetuned_fl.pth')

RuntimeError: Index tensor must have the same number of dimensions as input tensor

In [None]:
model.eval()

X_torch = torch.from_numpy(X_test).permute(0, 3, 1, 2)

y_pred = model(X_torch)

In [None]:
y_pred_np = y_pred.detach().cpu().numpy()
y_pred_labels = np.argmax(y_pred_np, axis=1)

#y_test = np.argmax(y_test, axis=1)

ConfusionMatrixDisplay.from_predictions(y_test, y_pred_labels)
plt.show()

acc = accuracy_score(y_test, y_pred_labels)
print("Accuracy EfficientNetb0: ", acc)