In [1]:
import geopandas as gpd
import pandas as pd
import os
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score

In [2]:
from torch import nn
from torchvision import models
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch

In [3]:
from augmentation import augment_image

In [4]:
X = np.load('data/3x3_data_nparray.npy')
y = np.load('data/3x3_labels_nparray.npy')

In [5]:
number_samples = X.shape[0]
#X_num = X_augmented.numpy()
#y_num = y_tensor_one_hot.numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24, shuffle=True)

In [6]:
X_tensor = torch.from_numpy(X_train)
y_tensor = torch.from_numpy(y_train)
y_tensor = torch.argmax(y_tensor, axis=1)

In [7]:
class_counts = torch.bincount(y_tensor)
minority_classes = torch.where(class_counts < 1000)[0]

minority_indices = torch.cat([torch.where(y_tensor == cls)[0] for cls in minority_classes])

minority_data = X_tensor[minority_indices]
minority_labels = y_tensor[minority_indices]


In [8]:
augmented_data = []
augmented_labels = []

for i, image in enumerate(minority_data):
    img1, img2, img3, img4, img5 = augment_image(image.numpy())  # Convert tensor to numpy for processing
    
    augmented_data.extend([img1, img2, img3, img4, img5])
    
    augmented_labels.extend([minority_labels[i].item()] * 5)# Append the same label

# Convert back to tensors
augmented_data = torch.tensor(augmented_data, dtype=torch.float32)
augmented_labels = torch.tensor(augmented_labels, dtype=torch.long)

# Combine augmented data with original dataset
X_augmented = torch.cat((X_tensor, augmented_data), dim=0).permute(0,3,1,2)
y_augmented = torch.cat((y_tensor, augmented_labels), dim=0)

print(f"New dataset size: {X_augmented.shape}, {y_augmented.shape}")

New dataset size: torch.Size([50107, 18, 3, 3]), torch.Size([50107])


  augmented_data = torch.tensor(augmented_data, dtype=torch.float32)


In [9]:
y_augmented = torch.nn.functional.one_hot(y_augmented, num_classes=19)
dataset = TensorDataset(X_augmented, y_augmented)

# Crea un DataLoader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [10]:
from resnet_baseline import model

In [None]:
model = model(dataloader, 18, 19)



Epoch 1/10, Loss: 1.0709


In [None]:
model.eval()

X_torch = torch.from_numpy(X_test).permute(0,3,1,2)

y_pred = model(X_torch)

In [None]:
y_pred_np = y_pred.detach().cpu().numpy()
y_pred_labels = np.argmax(y_pred_np, axis=1)

y_test_np = np.argmax(y_test, axis=1)

ConfusionMatrixDisplay.from_predictions(y_test_np, y_pred_labels)
plt.show()

acc = accuracy_score(y_test_np, y_pred_labels)
print("Accuracy ResNet18 with Focal Loss: ", acc)

In [None]:
plt.hist(np.argmax(y_augmented, axis=1), bins = 19)