In [1]:
import random as rd
import numpy as np
import pandas as pd
import os

from scoring import Metrics
from classification import Random

In [2]:
DIRPATH = 'src'

# Import data
import_path = os.path.join(DIRPATH, 'export.npy')

data = np.load (import_path)
#data[:, :-1] = data[:, :-1]/255.  # normalise values
vals_nb = data.shape[0]

In [3]:
# Create train and test sets (19.2s in low power, 11.5s)

def create_sets(data, train_frac=0.05):

    # Create (train, test)
    data_shuffled = data.copy()

    np.random.seed(0)
    np.random.shuffle(data_shuffled)

    train_nb = int(vals_nb * train_frac)
    # print(f'training on {train_nb} vals')

    return data_shuffled[:train_nb], data_shuffled[train_nb:]

data_train, data_test = create_sets(data, train_frac=0.3)

In [4]:
import cv2

# Test image
image_test_path = 'src/ima_1e8ccf23-d106-4227-908c-e4fbbb1da5f8.jpg'

image_test = image_test_path.split('/')[-1]
image_test_name, _ = os.path.splitext(image_test)
id_test = image_test_name[4:]

# Import and flatten test image
image_test_path = os.path.join('src/sky-images', image_test)
image_test_array = cv2.imread (image_test_path)
image_test_shape = image_test_array.shape
image_test_array_flat = image_test_array.reshape(-1, 1, 3)# / 255.
image_test_array_flat = image_test_array_flat.squeeze()

In [7]:
from classifiers.lda import LDA_bin as LDA
# F1 = 0.8913411645159924
# dt = 1:37min (low power)

RUN_LDA = True
if RUN_LDA:
    X_train, y_train = data_train[:, :-1], data_train[:, -1]
    X_test,  y_test  = data_test [:, :-1], data_test [:, -1]

    # Train LDA
    lda = LDA()
    lda.fit (X_train, y_train)

    # Test LDA
    pred_lda = lda.predict(X_test.T)
    metrics_lda = Metrics(pred_lda, y_test)

    # Test on test image
    image_test_pred = lda.predict(image_test_array_flat.T)
    image_test_pred = image_test_pred.reshape(image_test_shape[:2])
    cv2.imwrite(f'output/{image_test_name}-pred-lda.png', image_test_pred * 255)

[0 0 1 ... 1 1 0]
TP  = 2273658
FP  = 225912
FN  = 328428
TN  = 4121792
P   = 2602086
N   = 4347704
TPR = 0.8737828034892006
TNR = 0.9480387809289685
FPR = 0.05196121907103152
FNR = 0.1262171965107994
Precision           = 0.9096196545805879
False omission rate = 0.05196121907103152
Error rate          = 0.07976356120112982
Accuracy            = 0.9202364387988702
F1-score            = 0.8913411645159924
MCC                 = 0.8287908705201819
Kappa score         = 0.8283733047108975
[0 0 0 ... 0 0 0]


In [None]:
### QDA
from classifiers.qda import QDA

# F1 = 0.8984199239563736
# dt = 6.1s (100k datapoints) -> non-vectorized function

RUN_QDA = False
if RUN_QDA:
    data_train_qda = data_train
    data_test_qda  = data_test[:100000]
    train_vals, train_labels = data_train_qda[:, :-1], data_train_qda[:, -1]
    test_vals, test_labels   = data_test_qda[:, :-1], data_test_qda[:, -1]

    # Train QDA
    qda = QDA()
    qda.fit (train_vals, train_labels)

    # Test QDA
    pred_qda = qda.predict(test_vals)
    print(pred_qda.shape)
    metrics_qda = Metrics(test_labels, pred_qda)
    print(metrics_qda)

    # Test on test image
    image_test_pred = qda.predict(image_test_array_flat)
    image_test_pred = image_test_pred.reshape(image_test_shape[:2])
    cv2.imwrite(f'output/{image_test_name}-pred-qda.png', image_test_pred * 255)

(100000, 3)


ValueError: operands could not be broadcast together with shapes (100000,) (100000,3) 