In [1]:
import os
import numpy as np
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, confusion_matrix

In [13]:
import random
import shutil

def select_random_images(sat_folder, gt_folder, sat_folder_test, gt_folder_test, percentage=0.2):
    # Crear carpetas de destino si no existen
    os.makedirs(sat_folder_test, exist_ok=True)
    os.makedirs(gt_folder_test, exist_ok=True)
    
    # Obtener lista de archivos en la carpeta de imágenes satelitales, excluyendo los de la carpeta de prueba
    sat_files = [f for f in os.listdir(sat_folder) if os.path.isfile(os.path.join(sat_folder, f))]
    
    # Calcular el número de archivos a seleccionar
    num_files = int(len(sat_files) * percentage)
    
    # Seleccionar aleatoriamente el 20% de los archivos
    selected_files = random.sample(sat_files, num_files)
    
    # Mover cada archivo seleccionado y su máscara correspondiente
    for sat_file in selected_files:
        # Definir los caminos para la imagen satelital y la máscara
        sat_path = os.path.join(sat_folder, sat_file)
        gt_path_tiff = os.path.join(gt_folder, sat_file)
        gt_path_tif = os.path.join(gt_folder, sat_file.replace('.tiff', '.tif'))
        
        # Verificar si existe la máscara en formato .tiff o .tif
        if os.path.exists(gt_path_tiff):
            gt_path = gt_path_tiff
        elif os.path.exists(gt_path_tif):
            gt_path = gt_path_tif
        else:
            print(f"Warning: Missing mask for {sat_file}. Skipping this file.")
            continue
        
        # Definir los caminos de destino
        sat_dest = os.path.join(sat_folder_test, sat_file)
        gt_dest = os.path.join(gt_folder_test, os.path.basename(gt_path))
        
        # Mover los archivos
        shutil.move(sat_path, sat_dest)
        shutil.move(gt_path, gt_dest)
        
        print(f"Moved {sat_file} and corresponding mask to test folders.")
        



In [14]:
# Ejemplo de uso
sat_folder = 'DataSet_1/roads/sat'
gt_folder = 'DataSet_1/roads/gt'
sat_folder_test = 'DataSet_1/test/sat/'
gt_folder_test = 'DataSet_1/test/gt/'

select_random_images(sat_folder, gt_folder, sat_folder_test, gt_folder_test)

Moved 10228750_15.tiff and corresponding mask to test folders.
Moved 10528690_15.tiff and corresponding mask to test folders.
Moved 10228705_15.tiff and corresponding mask to test folders.
Moved 10228675_15.tiff and corresponding mask to test folders.


In [26]:
import os
import shutil

def reset_folders(sat_folder, gt_folder, sat_folder_test, gt_folder_test):
    # Obtener lista de archivos en las carpetas de prueba
    test_sat_files = os.listdir(sat_folder_test)
    test_gt_files = os.listdir(gt_folder_test)
    
    # Mover cada archivo de la carpeta de prueba a la carpeta original
    for test_file in test_sat_files:
        src_path = os.path.join(sat_folder_test, test_file)
        dest_path = os.path.join(sat_folder, test_file)
        shutil.move(src_path, dest_path)
    
    for test_file in test_gt_files:
        src_path = os.path.join(gt_folder_test, test_file)
        dest_path = os.path.join(gt_folder, test_file)
        shutil.move(src_path, dest_path)
    
    print("Todas las imágenes han sido devueltas a sus carpetas originales.")




In [21]:

# Ejecutar la función reset
# reset(sat_folder, gt_folder, sat_folder_test, gt_folder_test)

In [15]:
def convert_images_to_csv(sat_folder, gt_folder, output_csv_path):
    # List to store data from all images
    dataset = []
    
    # List available files in each directory to check for naming issues
    sat_files = sorted(os.listdir(sat_folder))
    gt_files = sorted(os.listdir(gt_folder))

    print("Satellite images found:", sat_files)
    print("Mask images found:", gt_files)

    for sat_file in sat_files:
        # Generate paths for both .tiff and .tif extensions
        sat_path = os.path.join(sat_folder, sat_file)
        gt_path_tiff = os.path.join(gt_folder, sat_file)
        gt_path_tif = os.path.join(gt_folder, sat_file.replace('.tiff', '.tif'))
        
        # Check if either .tiff or .tif version of the mask exists
        if os.path.exists(gt_path_tiff):
            gt_path = gt_path_tiff
        elif os.path.exists(gt_path_tif):
            gt_path = gt_path_tif
        else:
            print(f"Warning: Missing mask for {sat_file}. Skipping this file.")
            continue
        
        # Load images
        satellite_image = Image.open(sat_path)
        mask_image = Image.open(gt_path)
        
        # Convert images to numpy arrays
        satellite_array = np.array(satellite_image)
        mask_array = np.array(mask_image)
        
        # Ensure the satellite and mask images have matching dimensions
        if satellite_array.shape[:2] != mask_array.shape[:2]:
            print(f"Warning: Size mismatch for {sat_file}. Skipping this image pair.")
            continue
        
        # Flatten the arrays
        pixels_rgb = satellite_array.reshape(-1, 3)  # RGB features
        labels = mask_array.flatten()                # Binary labels (road or non-road)
        
        # Combine RGB values and labels
        image_data = np.column_stack((pixels_rgb, labels))
        
        # Append image data to dataset
        dataset.append(image_data)
    
    if dataset:
        # Concatenate all image data into a single dataset
        dataset = np.vstack(dataset)
        
        # Convert to DataFrame for easier manipulation and inspection
        df = pd.DataFrame(dataset, columns=['R', 'G', 'B', 'Label'])
        
        # Save DataFrame to a single CSV for machine learning training
        df.to_csv(output_csv_path, index=False)
        print(f"Dataset saved to {output_csv_path}")
    else:
        print("Error: No valid image data found. Check if files are correctly placed in 'sat' and 'gt' folders.")


In [16]:
output_csv = 'csv/RGBLabel.csv'

convert_images_to_csv(sat_folder, gt_folder, output_csv)

Satellite images found: ['10078675_15.tiff', '10228720_15.tiff', '10228735_15.tiff', '10378675_15.tiff', '10378690_15.tiff', '10378705_15.tiff', '10378720_15.tiff', '10378735_15.tiff', '10378750_15.tiff', '10378765_15.tiff', '10528675_15.tiff', '10528705_15.tiff', '10528720_15.tiff', '10528735_15.tiff', '10528750_15.tiff', '10528765_15.tiff']
Mask images found: ['10078675_15.tif', '10228720_15.tif', '10228735_15.tif', '10378675_15.tif', '10378690_15.tif', '10378705_15.tif', '10378720_15.tif', '10378735_15.tif', '10378750_15.tif', '10378765_15.tif', '10528675_15.tif', '10528705_15.tif', '10528720_15.tif', '10528735_15.tif', '10528750_15.tif', '10528765_15.tif']
Dataset saved to RGBLabel.csv


In [18]:
import pandas as pd

# Cargar el dataset
df = pd.read_csv('csv/RGBLabel.csv')

# Dividir en características (X) y etiquetas (y)
X = df[['R', 'G', 'B']]
Y = df['Label']

# Entrenar el modelo
ANNmodel = MLPClassifier(hidden_layer_sizes=(50,), early_stopping=True, tol=1e-4, max_iter=100, random_state=12)
ANNmodel.fit(X, Y)


In [19]:
test_csv = 'csv/testRGB.csv'

convert_images_to_csv(sat_folder_test, gt_folder_test, test_csv)

Satellite images found: ['10228675_15.tiff', '10228705_15.tiff', '10228750_15.tiff', '10528690_15.tiff']
Mask images found: ['10228675_15.tif', '10228705_15.tif', '10228750_15.tif', '10528690_15.tif']
Dataset saved to csv/testRGB.csv


In [24]:
from sklearn.metrics import classification_report, f1_score, recall_score

# Cargar el dataset
test = pd.read_csv('csv/testRGB.csv')

# Dividir en características (X) y etiquetas (y)
tX = test[['R', 'G', 'B']]
tY = test['Label']


# Predicciones
Y_pred = ANNmodel.predict(tX)

# Calcular métricas
f1 = f1_score(tY, Y_pred, pos_label=255)
recall = recall_score(tY, Y_pred, pos_label=255)


# Calcular coeficiente Dice
dice = (2 * f1) / (1 + f1)

# Guardar métricas en un DataFrame
metrics_df = pd.DataFrame({
    'Metric': ['F1 Score', 'Recall', 'Dice Coefficient'],
    'Value': [f1, recall, dice]
})

# Guardar en CSV
metrics_df.to_csv('csv/metrics.csv', index=False)
print("Métricas guardadas en metrics.csv")


Métricas guardadas en metrics.csv


In [30]:
from sklearn.metrics import f1_score, recall_score
import pandas as pd

def evaluate_model(ANNmodel, test_csv, output_csv, pos_label=255):
    # Cargar el dataset de prueba
    test = pd.read_csv(test_csv)

    # Dividir en características (X) y etiquetas (y)
    tX = test[['R', 'G', 'B']]
    tY = test['Label']

    # Predicciones
    Y_pred = ANNmodel.predict(tX)

    # Calcular métricas
    f1 = f1_score(tY, Y_pred, pos_label=pos_label)
    recall = recall_score(tY, Y_pred, pos_label=pos_label)

    # Calcular coeficiente Dice
    dice = (2 * f1) / (1 + f1)

    # Guardar métricas en un DataFrame
    metrics_df = pd.DataFrame({
        'Metric': ['F1 Score', 'Recall', 'Dice Coefficient'],
        'Value': [f1, recall, dice]
    })

    # Guardar en CSV
    metrics_df.to_csv(output_csv, index=False)
    print(f"Métricas guardadas en {output_csv}")


In [None]:
test_csv = 'csv/testRGB.csv'
output_csv = 'csv/metrics.csv'

evaluate_model(ANNmodel, test_csv, output_csv)


In [27]:
reset_folders(sat_folder, gt_folder, sat_folder_test, gt_folder_test)

Todas las imágenes han sido devueltas a sus carpetas originales.
