In [72]:
import geopandas as gpd
import pandas as pd
import time

from obia.handlers import open_geotiff
from obia.segment import segment
from obia.classify import classify

In [23]:
raster_path = "/mnt/c/tmp/output/output_179.tif"

image = open_geotiff(raster_path)
segmented_image = segment(
    image, segmentation_bands=[7,4,0],
    method="slic", n_segments=50000, compactness=0.01, max_num_iter=500, sigma=0, convert2lab=False, slic_zero=True,
    calc_skewness=False, calc_kurtosis=False, calc_homogeneity=False, calc_ASM=False
)

In [24]:
segmented_image.write_segments(f'/mnt/c/tmp/output/output_179.gpkg')

In [25]:
raster_path = "/mnt/c/tmp/output/output_192.tif"

image = open_geotiff(raster_path)
segmented_image = segment(
    image, segmentation_bands=[7,4,0],
    method="slic", n_segments=50000, compactness=0.01, max_num_iter=500, sigma=0, convert2lab=False, slic_zero=True,
    calc_skewness=False, calc_kurtosis=False, calc_homogeneity=False, calc_ASM=False
)
segmented_image.write_segments(f'/mnt/c/tmp/output/output_192.gpkg')

In [26]:
raster_path = "/mnt/c/tmp/output/output_196.tif"

image = open_geotiff(raster_path)
segmented_image = segment(
    image, segmentation_bands=[7,4,0],
    method="slic", n_segments=50000, compactness=0.01, max_num_iter=500, sigma=0, convert2lab=False, slic_zero=True,
    calc_skewness=False, calc_kurtosis=False, calc_homogeneity=False, calc_ASM=False
)
segmented_image.write_segments(f'/mnt/c/tmp/output/output_196.gpkg')

In [31]:
from obia.utils import label_segments
import pandas as pd


In [36]:
training_points = gpd.read_file("/mnt/c/tmp/output/training_points.gpkg")

In [41]:
tile_numbers = [62, 80, 163, 179, 192, 196, 199, 208, 210]
tiles = []
for tile in tile_numbers:
    print(f"On tile {tile}")
    segments = gpd.read_file(f"/mnt/c/tmp/output/output_{tile}.gpkg")

    if tile == 208:
        droplist = ['_min', '_max', '_skewness', '_kurtosis', '_homogeneity', '_ASM']
        columns_to_drop = [col for col in segments.columns if any(substring in col for substring in droplist)] + ['nobs']
        segments = segments.drop(columns=columns_to_drop)
    
    labelled_segments, mixed_segments = label_segments(segments, training_points)
    print(f"mixed segments: {len(mixed_segments)}")
    print(f"labelled segments: {labelled_segments.shape}")
    tiles.append(labelled_segments)

training_segments = pd.concat(tiles, ignore_index=True)
print(training_segments.shape)
training_segments.head()

In [45]:
print(training_segments.dtypes)


In [47]:
import numpy as np

def convert_int(x):
    if isinstance(x, np.int64):
        return int(x)
    return x

training_segments = training_segments.applymap(convert_int)

# training_segments.to_file("/mnt/c/tmp/output/training_segments.gpkg")

In [96]:
training_segments = gpd.read_file("/mnt/c/tmp/output/training_segments_2.gpkg")

In [107]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Define base column groups
base_group1 = ['_mean', '_variance']
base_group2 = ['_contrast', '_correlation']

# Create a 8-row 2-column subplot
fig, axes = plt.subplots(8, 2, figsize=(20, 40))

# Iterate over each band
for i in range(8):
    # Create group name for the current band
    group1 = [f'b{i}{suffix}' for suffix in base_group1]
    group2 = [f'b{i}{suffix}' for suffix in base_group2]
    group1.append('feature_class')
    group2.append('feature_class')

    # Melt the dataframe for each group
    melted_df1 = pd.melt(training_segments[group1], id_vars='feature_class',
                         var_name='Attributes', value_name='Values')
    melted_df2 = pd.melt(training_segments[group2], id_vars='feature_class',
                         var_name='Attributes', value_name='Values')

    # Create boxplots for the current band
    sns.boxplot(ax=axes[i, 0], data=melted_df1, x='Attributes',
                y='Values', hue='feature_class', showfliers=False)
    sns.boxplot(ax=axes[i, 1], data=melted_df2, x='Attributes',
                y='Values', hue='feature_class', showfliers=False)

    # Rotate x-axis labels for better visibility
    for ax in axes[i]:
        ax.tick_params(axis='x', rotation=45)
        ax.legend_.remove()

    # Set titles
    axes[i, 0].title.set_text(f'Boxplots for b{i}_mean, b{i}_variance')
    axes[i, 1].title.set_text(f'Boxplots for b{i}_contrast, b{i}_correlation')

# Add legend 
handles, labels = axes[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right')

plt.tight_layout()
plt.savefig('boxplot.png', dpi=300)
plt.show()


In [97]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Define base column groups
base_group1 = ['_mean', '_variance']
base_group2 = ['_contrast', '_correlation']

# Create a 8-row 2-column subplot
fig, axes = plt.subplots(8, 2, figsize=(20, 40))

# Iterate over each band
for i in range(8):
    # Create group name for the current band
    group1 = [f'b{i}{suffix}' for suffix in base_group1]
    group2 = [f'b{i}{suffix}' for suffix in base_group2]
    group1.append('feature_class')
    group2.append('feature_class')

    # Melt the dataframe for each group
    melted_df1 = pd.melt(training_segments[group1], id_vars='feature_class',
                         var_name='Attributes', value_name='Values')
    melted_df2 = pd.melt(training_segments[group2], id_vars='feature_class',
                         var_name='Attributes', value_name='Values')

    # Create boxplots for the current band
    sns.boxplot(ax=axes[i, 0], data=melted_df1, x='Attributes',
                y='Values', hue='feature_class', showfliers=False)
    sns.boxplot(ax=axes[i, 1], data=melted_df2, x='Attributes',
                y='Values', hue='feature_class', showfliers=False)

    # Rotate x-axis labels for better visibility
    for ax in axes[i]:
        ax.tick_params(axis='x', rotation=45)
        ax.legend_.remove()

    # Set titles
    axes[i, 0].title.set_text(f'Boxplots for b{i}_mean, b{i}_variance')
    axes[i, 1].title.set_text(f'Boxplots for b{i}_contrast, b{i}_correlation')

# Add legend 
handles, labels = axes[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right')

plt.tight_layout()
plt.show()


In [92]:
variables = []
for i in range(8):
    for stat in ['mean', 'variance', 'contrast', 'correlation']:
        variables.append(f'b{i}_{stat}')

p_values = {}
for var in variables:
    feature_class_groups = training_segments.groupby('feature_class')[var].apply(list)
    _, p_val = stats.kruskal(*feature_class_groups)
    p_values[var] = p_val if p_val > 0.001 else '***'

p_values_df = pd.Series(p_values, name='p_value').reset_index().rename(columns={'index': 'variable'})
print(p_values_df)


In [84]:
training_segments['feature_class'].unique()

In [103]:
columns_to_keep = [
    'segment_id', 
    'feature_class',
    'b7_mean', 
    'b7_variance', 
    'b7_contrast',
    'b7_correlation',
    'geometry'
    ]

training = training_segments[columns_to_keep]
training['feature_class'] = training['feature_class'].astype(int)
training['feature_class'].value_counts()

In [73]:
raster_path = "/mnt/c/tmp/output/output_62.tif"
image = open_geotiff(raster_path)
segmented_image = segment(
    image, segmentation_bands=[7,4,1], statistics_bands=[7, 4, 1],
    method="slic", n_segments=50000, compactness=0.01, max_num_iter=500, sigma=0, convert2lab=False, slic_zero=True,
    calc_skewness=False, calc_kurtosis=False, calc_homogeneity=False, calc_ASM=False, calc_dissimilarity=False, calc_energy=False
)

In [104]:
columns_to_drop = [col for col in segmented_image.segments.columns if col not in training.columns]
segments_old = segmented_image.segments.copy()
# Drop these columns from 'segmented_image.segments'
segmented_image.segments = segmented_image.segments.drop(columns=columns_to_drop)

print(segmented_image.segments.columns)

In [105]:
classified = classify(image, segmented_image, training, method='mlp', compute_shap=False, hidden_layer_sizes=(50, 50 ), solver='adam', max_iter=10000)

In [80]:
classified_rf = classify(image, segmented_image, training, method='rf', compute_shap=False)

In [78]:
import geopandas as gpd
import rasterio
from rasterio.features import shapes
import numpy as np


def raster_to_vector(raster, value, transform):
    """
    Convert a raster into a vector (Polygon).

    Args:
        raster(numpy.array): A 2D numpy array representing the raster.
        value(int): Value of pixels to be used for generating polygons.
        transform (Affine): A rasterio.Affine class instance representing the transformation matrix.

    Returns:
        Geopandas GeoDataFrame representation.
    """
    mask = None if value is None else (raster == value)
    shapes_gen = shapes(raster, mask=mask, transform=transform)
    poly_dict_list = []
    for poly_dict, val in shapes_gen:
        new_dict = {'properties': {'value': val}, 'geometry': poly_dict}
        poly_dict_list.append(new_dict)
    vector_df = gpd.GeoDataFrame.from_features(poly_dict_list)
    return vector_df




def write_vector_to_geopackage(vector, filepath):
    """
    Write a vector into a geopackage file (.gpkg).

    Args:
        vector(GeoDataFrame): The vector data in GeoDataFrame format.
        filepath(str): The file path where the results should be written.
    """
    vector.to_file(filepath, driver='GPKG')



In [106]:
vector = raster_to_vector(classified.classified_image.astype(np.int16), None, image.transform)

write_vector_to_geopackage(vector, "/mnt/c/tmp/output/classified_62_mlp.gpkg")

In [81]:
vector_rf = raster_to_vector(classified_rf.classified_image.astype(np.int16), None, image.transform)

write_vector_to_geopackage(vector_rf, "/mnt/c/tmp/output/classified_62_rf.gpkg")