In [None]:
import os
import cv2
import json
import numpy as np
import pandas
import skimage
import shapefile
import matplotlib.pyplot as plt

import modules

### load data

In [None]:
kenya_osm, kenya_sf = modules.data.load_shapefile("kenya")
kenya_geo = modules.data.load_geodata("kenya")
kenya_dat = pandas.DataFrame.merge(kenya_geo, kenya_osm, on="index")

kenya_filenames = set(modules.data.util.load_image_filenames("kenya", D=128))

In [None]:
N = 300

kenya_dat[kenya_dat["class"] == "major"]
kenya_dat[kenya_dat["class"] == "minor"]
kenya_dat[kenya_dat["class"] == "two-track"]
kenya_dat["valid"] = [f"{int(index)}_{int(road_id)}.npy" in kenya_filenames for index, road_id in enumerate(kenya_dat.values[:, 0])]

major = kenya_dat.iloc[np.random.choice(kenya_dat[np.logical_and(kenya_dat["class"] == "major", kenya_dat["valid"] == True)].index, size=N, replace=False)]
minor = kenya_dat.iloc[np.random.choice(kenya_dat[np.logical_and(kenya_dat["class"] == "minor", kenya_dat["valid"] == True)].index, size=N, replace=False)]
two_track = kenya_dat.iloc[np.random.choice(kenya_dat[np.logical_and(kenya_dat["class"] == "two-track", kenya_dat["valid"] == True)].index, size=N, replace=False)]

filenames = [f"{idx}_{int(df.loc[idx]['id'])}.npy" for df in [major, minor, two_track] for idx in df.index]
permutation = np.arange(len(filenames))
np.random.shuffle(permutation)

images = [np.load(os.path.join(modules.data.util.root(), "kenya", "kenya_128x128_images", filenames[i])) for i in permutation]
images = np.array(images)
labels = permutation // N
             

### featurize

In [None]:
def _SIFT(image, sift, plot=False):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    keypoints = sift.detect(image, None)
    if plot:
        plt.imshow(cv2.drawKeypoints(image, keypoints, outImage=np.array([])))
    return keypoints

def SIFT(images):
    keypoints = []
    sift = cv2.xfeatures2d.SIFT_create()
    for image in images:
        kpts = _SIFT(image, sift)
        keypoints.append(kpts)
    return keypoints

sift = SIFT(images[:5])

In [None]:
def HOG(images):
    features = []
    hog = cv2.HOGDescriptor()
    for image in images:
        features.append(hog.compute(image))
    return features

hog = HOG(images[:5])

In [None]:
def Canny(images):
    channels = []
    for image in images:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        median = np.median(gray)
        channel = cv2.Canny(gray, (1/2) * median, (2) * median, apertureSize=3)[:, :, None]
        channels.append(channel)
    return np.array(channels)

In [None]:
def location(df, fnames):
    locations = []
    for fname in fnames:
        index = fname.split("_")[0]
        locations.append(df.loc[int(index)][["lat", "lon"]].values.astype(np.float64))
    return np.array(locations)

In [None]:
def channel_mean(images):
    return np.mean(images, axis=(1, 2))

In [None]:
def channel_variance(images):
    return np.mean(np.power(images - np.mean(images, axis=(1, 2))[:, None, None, :], 2), axis=(1, 2))

In [None]:
def feature_set(images):
    canny = Canny(images)

    locations = location(kenya_dat, filenames)
    rgb_means = channel_mean(images)
    rgb_variances = channel_variance(canny)
    canny_means = channel_mean(images)
    canny_variances = channel_variance(canny)
    
    return np.concatenate([
        locations,
        rgb_means,
        rgb_variances,
        canny_means,
        canny_variances
    ], axis=-1)

In [None]:
features = feature_set(images)
for i in range(0+1, 128-1, 128//3):
    for j in range(0+1, 128-1, 128//3):
        print(f"Patch ({i}, {j})")
        patches = images[:, i:i+128//3, j:j+128//3, :]
        features = np.concatenate([features, feature_set(patches)], axis=-1)

### Train

In [None]:
from sklearn.preprocessing import MinMaxScaler

from sklearn import linear_model, svm
from sklearn import tree
from sklearn import ensemble
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import r2_score, accuracy_score, f1_score, log_loss

In [None]:
def test(classifier, X_train, y_train, X_test, y_test, verbose=True):
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    val_scores = [r2_score(y_test, y_pred), 
              accuracy_score(y_test, y_pred), 
              f1_score(y_test, y_pred, average=None),
             ]
    y_pred = classifier.predict(X_train)
    train_scores = [r2_score(y_train, y_pred), 
              accuracy_score(y_train, y_pred), 
              f1_score(y_train, y_pred, average=None),
             ]
    if verbose:
        print("val")
        print(f"r^2: {val_scores[0]}")
        print(f"accuracy: {val_scores[1]}")
        print(f"f1: {val_scores[2]}")
        print("train")
        print(f"r^2: {train_scores[0]}")
        print(f"accuracy: {train_scores[1]}")
        print(f"f1: {train_scores[2]}")

In [None]:
cutoff = features.shape[0] * 9 // 10

scaler = MinMaxScaler()
scaler.fit(features[:cutoff])

scaled_features = scaler.transform(features)
X_train = scaled_features[:cutoff]
y_train = labels[:cutoff]
X_test = scaled_features[cutoff:]
y_test = labels[cutoff:]
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
# Various classifiers of interest implemented by SKLearn
log_classifier = linear_model.LogisticRegression(solver="lbfgs", max_iter=1000)
svm_classifier = svm.SVC(kernel='rbf', gamma="auto")
k_classifier = KNeighborsClassifier(n_neighbors=6)
tree_classifier = tree.DecisionTreeClassifier()
rf_classifier = ensemble.RandomForestClassifier(n_estimators=100)
boost_classifier = ensemble.GradientBoostingClassifier()
nn_classifier = MLPClassifier(max_iter=1000)

In [None]:
test(log_classifier, X_train, y_train, X_test, y_test)

In [None]:
test(svm_classifier, X_train, y_train, X_test, y_test)

In [None]:
test(k_classifier, X_train, y_train, X_test, y_test)

In [None]:
test(tree_classifier, X_train, y_train, X_test, y_test)

In [None]:
test(rf_classifier, X_train, y_train, X_test, y_test)

In [None]:
test(boost_classifier, X_train, y_train, X_test, y_test)

In [None]:
test(nn_classifier, X_train, y_train, X_test, y_test)