In [25]:
from tqdm import tqdm
import cv2
import numpy as np
import graph_based_image_segmentation as gbis
import json
from sklearn.decomposition import PCA
import joblib
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [None]:
sigma = 1
k = 1
k_i = 0
total = list(range(1, 1001))
for i in tqdm(range(1, 1001)):
    src = cv2.imread('data/imgs/' + str(i) + '.png')
    k, k_i, Id = gbis.Find_k(src.astype(float), k, k_i, sigma, gbis.MergeNeareast, gbis.Cosine)
    if Id is not None:
        vis = gbis.Visualize(Id)
        cv2.imwrite('data/classification/segmentation/' + str(i) + '.png', vis.astype(float))
    else:
        total.remove(i)
with open('data/classification/total.txt', 'w') as f:
    f.write(str(total))

  0%|                                                                                         | 0/1000 [00:00<?, ?it/s]

In [2]:
with open('data/classification/total.txt') as f:
    total = json.loads(f.readline())
np.random.seed(0)
np.random.shuffle(total)
train = total[:200]
test = total[200:]

In [3]:
H = dict()
for i in tqdm(total):
    src = cv2.imread('data/imgs/' + str(i) + '.png')
    h = [[0] * 512, list()]
    Id = cv2.imread('data/classification/segmentation/' + str(i) + '.png')[:, :, 0].astype(np.uint)
    Id_uni = np.unique(Id)
    for j in Id_uni:
        x2 = [0] * 512
        for g, b, r in src[np.where(Id == j)]:
            x2[g // 32 * 64 + b // 32 * 8 + r // 32] += 1
            h[0][g // 32 * 64 + b // 32 * 8 + r // 32] += 1
        h[1].append(x2)
    H[str(i)] = h
with open('data/classification/H.txt', 'w') as f:
    f.write(json.dumps(H))

100%|████████████████████████████████████████████████████████████████████████████████| 999/999 [05:50<00:00,  2.85it/s]


In [4]:
with open('data/classification/H.txt') as f:
    H = json.loads(f.readline())

In [5]:
X1 = [list(), list()]
for i in train:
    for x in H[str(i)][1]:
        X1[0].append(H[str(i)][0])
        X1[1].append(x)
X1[0] = np.array(X1[0])
X1[1] = np.array(X1[1])
X1 = np.concatenate([X1[i] / X1[i].sum(axis=1, keepdims=True) for i in (0, 1)], axis=1)

In [6]:
%%time
pca = PCA(n_components=20).fit(X1)
joblib.dump(pca, 'data/classification/pca.model')

Wall time: 590 ms


['data/classification/pca.model']

In [7]:
pca = joblib.load('data/classification/pca.model')

In [8]:
X1 = pca.transform(X1)

In [9]:
%%time
k = 50
kmeans = KMeans(n_clusters=k, init='k-means++', random_state=0).fit(X1)
joblib.dump(kmeans, 'data/classification/kmeans.model')

Wall time: 4.16 s


['data/classification/kmeans.model']

In [10]:
kmeans = joblib.load('data/classification/kmeans.model')

In [11]:
X2 = X1 @ kmeans.cluster_centers_.T / (np.linalg.norm(X1) * np.linalg.norm(kmeans.cluster_centers_))
X = np.concatenate((X1, X2), axis=1)

In [12]:
Y = list()
for i in tqdm(train):
    mask = cv2.imread('data/gt/' + str(i) + '.png')[:, :, 0] > 0
    Id = cv2.imread('data/classification/segmentation/' + str(i) + '.png')[:, :, 0].astype(np.uint)
    mask_new = gbis.Mask(mask, Id)[0][:, :, 0]
    Id_uni = np.unique(Id)
    for j in Id_uni:
        Y.append(int(mask_new[Id == j].any()))
Y = np.array(Y)

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:05<00:00, 35.76it/s]


In [13]:
def score(dataset, model_name):
    X1 = [list(), list()]
    for i in dataset:
        for x in H[str(i)][1]:
            X1[0].append(H[str(i)][0])
            X1[1].append(x)
    X1[0] = np.array(X1[0])
    X1[1] = np.array(X1[1])
    X1 = np.concatenate([X1[i] / X1[i].sum(axis=1, keepdims=True) for i in (0, 1)], axis=1)
    X1 = pca.transform(X1)
    X2 = X1 @ kmeans.cluster_centers_.T / (np.linalg.norm(X1) * np.linalg.norm(kmeans.cluster_centers_))
    X = np.concatenate((X1, X2), axis=1)
    model = joblib.load('data/classification/' + model_name + '.model')
    Y_pred = model.predict(X)
    Y = list()
    for i in tqdm(dataset):
        mask = cv2.imread('data/gt/' + str(i) + '.png')[:, :, 0] > 0
        Id = cv2.imread('data/classification/segmentation/' + str(i) + '.png')[:, :, 0].astype(np.uint)
        mask_new = gbis.Mask(mask, Id)[0][:, :, 0]
        Id_uni = np.unique(Id)
        for j in Id_uni:
            Y.append(int(mask_new[Id == j].any()))
    Y = np.array(Y)
    TP = np.logical_and(Y == Y_pred, Y_pred != 0).sum()
    FP = np.logical_and(Y != Y_pred, Y_pred != 0).sum()
    FN = np.logical_and(Y == Y_pred, Y_pred == 0).sum()
    TN = np.logical_and(Y != Y_pred, Y_pred == 0).sum()
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1 = 2 * precision * recall / (precision + recall)
    print('Accuracy:', (Y == Y_pred).sum() / Y.shape[0])
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1 score:', f1)

In [14]:
%%time
svc = SVC(random_state=0).fit(X, Y)
joblib.dump(svc, 'data/classification/svc.model')

Wall time: 6.13 s


['data/classification/svc.model']

In [15]:
score(train, 'svc')

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:05<00:00, 37.10it/s]

Accuracy: 0.8068181818181818
Precision: 0.7926078028747433
Recall: 0.1359154929577465
F1 score: 0.2320408776675684





In [16]:
score(test, 'svc')

100%|████████████████████████████████████████████████████████████████████████████████| 799/799 [00:21<00:00, 36.46it/s]

Accuracy: 0.7450807565867835
Precision: 0.5934398654331371
Recall: 0.11279854205966046
F1 score: 0.18956531083767664





In [29]:
%%time
lr = LogisticRegression(random_state=0).fit(X, Y)
joblib.dump(lr, 'data/classification/lr.model')

Wall time: 86.8 ms


['data/classification/lr.model']

In [30]:
score(train, 'lr')

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:05<00:00, 34.92it/s]

Accuracy: 0.7247159090909091
Precision: 0.4855072463768116
Recall: 0.026264210113680908
F1 score: 0.04983265154332465





In [31]:
score(test, 'lr')

100%|████████████████████████████████████████████████████████████████████████████████| 799/799 [00:22<00:00, 35.96it/s]

Accuracy: 0.7166611081995331
Precision: 0.4733072916666667
Recall: 0.024165669458848557
F1 score: 0.04598355471220746





In [17]:
%%time
rfc = RandomForestClassifier(random_state=0).fit(X, Y)
joblib.dump(rfc, 'data/classification/rfc.model')

Wall time: 5.49 s


['data/classification/rfc.model']

In [18]:
score(train, 'rfc')

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:05<00:00, 35.68it/s]

Accuracy: 0.9997159090909091
Precision: 0.9989648033126294
Recall: 0.2742256322818983
F1 score: 0.4303232998885173





In [19]:
score(test, 'rfc')

100%|████████████████████████████████████████████████████████████████████████████████| 799/799 [00:21<00:00, 36.63it/s]

Accuracy: 0.6865262756682071
Precision: 0.45643314574034544
Recall: 0.24483847461743988
F1 score: 0.31871358236596053



