# Applying classification

Applying Ground Truth models to both digitized and segmentized.

In [1]:
import os

from functools import partial

import pandas as pd
import numpy as np
import geopandas as gpd

from scipy.spatial.distance import pdist, cdist, squareform
from shapely.geometry import Point
from sklearn.preprocessing import StandardScaler

from joblib import load

In [2]:
def PUK_kernel(X1,X2, sigma, omega):
    """Compute the kernel matrix between two arrays using the Pearson VII function-based universal kernel.
    From: @rlphilli - https://github.com/rlphilli/sklearn-PUK-kernel/blob/master/PUK_kernel.py
    """
    # Compute squared euclidean distance between each row element pair of the two matrices
    if X1 is X2 :
        kernel = squareform(pdist(X1, 'sqeuclidean'))
    else:
        kernel = cdist(X1, X2, 'sqeuclidean')

    kernel = (1 + (kernel * 4 * np.sqrt(2**(1.0/omega)-1)) / sigma**2) ** omega
    kernel = 1/kernel

    return kernel

def assign_stratum(sample_id, strata_samples):
    for stratum, fids in strata_samples.items():
        for fid in fids:
            if int(sample_id) == int(fid):
                return stratum
    
    return None

Load models and scalers

In [3]:
models_path = '../data/results/models'
scalers_path = '../data/results/scalers'

strata_samples = {
    'residential': [13, 6, 0, 23, 8],
    'rural': [5, 19],
    'shanty': [3, 9, 2],
    'urbanirreg': [21, 15, 18, 16],
    'urbanreg': [11, 20, 4, 1, 7, 14, 12]
}
blacklist = [str(x) for x in [10, 17, 22]]
scalers = {stratum:load(os.path.join(scalers_path, f'{stratum}_scaler.joblib')) for stratum in strata_samples.keys()}

models = {stratum:{} for stratum in strata_samples.keys()}
for stratum in strata_samples.keys():
    for model_name in [m for m in os.listdir(models_path) if m.startswith(stratum)]:
        model_name_short = model_name.replace(f'{stratum}_', '').replace('_model.joblib', '')
        models[stratum][model_name_short] = load(os.path.join(models_path, model_name))

## Digitized

In [4]:
classified_path = '../data/results/classified'
os.makedirs(classified_path, exist_ok=True)

digitized = {k:gpd.read_file(f'../data/Digitization/output/FID_{k}_digi_elev.gpkg') for k in range(24) if k not in blacklist}
digitized = {stratum:pd.concat([digitized[k] for k in strata_samples[stratum]]) for stratum in strata_samples.keys()}

digi_predictions = {}
for stratum, digi_df in digitized.items():
    df = digi_df.copy()
    df.loc[:, 'area'] = df.geometry.area
    df['roof_type'] = df['Roof'].str.lower()
    df['stories'] = df['mean']/3
    df['utm_x'] = df.geometry.centroid.x
    df['utm_y'] = df.geometry.centroid.y
    df = df.loc[(df['area'] > 10) & (df['stories'] > 0.5)]
    df.dropna(subset=['stories', 'area', 'roof_type', 'utm_x', 'utm_y'], inplace=True)
    
    # Select columns and scale
    df_x = df.loc[:, ['stories', 'area', 'roof_type', 'utm_x', 'utm_y']].copy()
    df_x = pd.get_dummies(df_x)
    for col in df_x.columns:
        df_x[col] = df_x[col].astype(np.float64)
    df_scaled = StandardScaler().fit_transform(df_x)
    
    # Apply classifier and save result in original DF
    for model, clf in models[stratum].items():
        df[f'class_{model}'] = clf.predict(df_scaled)
    
    # Save as CSV and also GPKG
    df.to_csv(os.path.join(classified_path, f'{stratum}_digitized.csv'), index=False)
    df.to_file(os.path.join(classified_path, f'{stratum}_digitized.gpkg'), driver='GPKG')
    digi_predictions[stratum] = df

## Segmentized

In [5]:
segmentized_all = gpd.read_file('../data/results/rgb_elev_segm.gpkg')

segmentized_all['sample_id'] = segmentized_all['segment_id'].str.split('_').str[0]
segmentized_all['stratum'] = segmentized_all['sample_id'].apply(assign_stratum, strata_samples=strata_samples)
segmentized_all = segmentized_all.loc[segmentized_all['class_right'].isin(['concrete', 'tin'])]
segmentized_all['roof_type'] = segmentized_all['class_right'].replace({'concrete': 'rc'})

segmentized_all = segmentized_all.loc[(segmentized_all['area'] > 10) & (segmentized_all['stories'] > 0.5)]

segmentized = {k:segmentized_all.loc[segmentized_all['stratum'] == k] for k in strata_samples.keys()}
segm_predictions = {}
for stratum, segm_df in segmentized.items():
    df = segm_df.copy()
    df.dropna(subset=['stories', 'area', 'roof_type', 'utm_x', 'utm_y'], inplace=True)
    
    # Select columns and scale
    df_x = df.loc[:, ['stories', 'area', 'roof_type', 'utm_x', 'utm_y']].copy()
    df_x = pd.get_dummies(df_x)
    for col in df_x.columns:
        df_x[col] = df_x[col].astype(np.float64)
    df_scaled = StandardScaler().fit_transform(df_x)
    
    # Apply classifier and save result in original DF
    for model, clf in models[stratum].items():
        df[f'class_{model}'] = clf.predict(df_scaled)
    
    # Save as CSV and also GPKG
    df.to_csv(os.path.join(classified_path, f'{stratum}_segmentized.csv'), index=False)
    df.to_file(os.path.join(classified_path, f'{stratum}_segmentized.gpkg'), driver='GPKG')
    segm_predictions[stratum] = df