In [2]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import cv2
import os
import pandas as pd
import numpy as np
import glob

In [2]:
seg_model = 'k-means'
seg_nd = '3d'
cluster_num = 128
slice_num = 400

In [11]:
# dragonfly
def get_dragonfly_label_by_slice(slice_num):
    current_path = os.getcwd()
    s = slice_num - 400 + 1
    s = str(s).zfill(3)
    dragonfly_bassanite = os.path.join(current_path, 'VA10_0050_Bassanite\VA10_0050_Bassanite{}.tiff'.format(s))
    dragonfly_celestite = os.path.join(current_path, 'VA10_0050_Celestite\VA10_0050_Celestite{}.tiff'.format(s))
    dragonfly_gypsum = os.path.join(current_path, 'VA10_0050_Gypsum\VA10_0050_Gypsum{}.tiff'.format(s))
    dragonfly_pore = os.path.join(current_path, 'VA10_0050_Pores\VA10_0050_Pores{}.tiff'.format(s))

    bassanite = cv2.imread(dragonfly_bassanite)
    celestite = cv2.imread(dragonfly_celestite)
    gypsum = cv2.imread(dragonfly_gypsum)
    pore = cv2.imread(dragonfly_pore)

    # transfer the reference into 0/1 labels and then turn to its class number
    # 0: other;
    # 1: pore;
    # 2: gypsum;
    # 3: celestite;
    # 4: bassanite
    pore_label = (pore == [255,255,255]).all(axis=2) * 1
    gypsum_label = (gypsum == [255,255,255]).all(axis=2) * 2
    celestite_label = (celestite == [255,255,255]).all(axis=2) * 3
    bassanite_label = (bassanite == [255,255,255]).all(axis=2) * 4

    label = pore_label + gypsum_label + celestite_label + bassanite_label

    return label.flatten()

In [12]:
# gmm / k-means
def get_unsupervised_pred_by_slice(manual_label, seg_model, seg_nd, cluster_num, slice_num):
    current_path = os.getcwd()
    seg_path = os.path.join(current_path, 'new_large_clusters_rec', seg_model, seg_nd, 'cluster_{}'.format(cluster_num))
    seg_res_list = glob.glob(os.path.join(seg_path, str(slice_num), '*.png'))
    
    img = 0

    assert len(manual_label) == len(seg_res_list) == cluster_num

    for seg in seg_res_list:
        # our segmentation results has invered color, black [0,0,0] is the interested class
        c = int(os.path.basename(seg[-16:-13]))
        if manual_label[c] == 1:  # pore
            pore = cv2.imread(seg)
            pore = (pore == [0,0,0]).all(axis=2) * 1
            img += pore
        elif manual_label[c] == 2:  #gypsum
            gypsum = cv2.imread(seg)
            gypsum = (gypsum == [0,0,0]).all(axis=2) * 2
            img += gypsum
        elif manual_label[c] == 3:  #celestite
            celestite = cv2.imread(seg)
            celestite = (celestite == [0,0,0]).all(axis=2) * 3
            img += celestite
        elif manual_label[c] == 4:  #bassanite
            bassanite = cv2.imread(seg)
            bassanite = (bassanite == [0,0,0]).all(axis=2) * 4
            img += bassanite

    return img.flatten()

### load k-means 3d 128 model vgg16 & resnet

In [13]:
import numpy as np
import os
from matplotlib import pyplot as plt
import cv2
import csv
import pandas as pd
import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Conv2D #images are two dimensional. Videos are three dimension.
from keras.layers import MaxPooling2D, Flatten, Dense, Dropout

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

In [14]:
vgg = load_model('vgg_201_aligned_model\k-means_3d_128_vgg16_non_trainable_fc.h5')

### test data

In [15]:
def get_cluster_num_str(c):
    c = str(c).zfill(3)
    return c

In [16]:
# Set the target segmentation results 
res_folder = 'new_large_clusters_rec'
seg_model = 'k-means'   # choose between 'gmm' and 'k-means'
seg_nd = '3d'   # choose between '3d' and '4d'
cluster_num = 128  # choose between 16, 32, 64, and 128

In [17]:
# corresponding csv file to get slice & cluster combination
csv_file = os.path.join(os.getcwd(), 'seg_res_evaluation_csv', '{}_{}_{}.csv'.format(seg_model, seg_nd, cluster_num))
df = pd.read_csv(csv_file, usecols = ['slice', 'current_cluster'])
df['filename'] = df['slice'].map(str) + '\VA10_0050_0' + df['slice'].map(str) + '_' + df['current_cluster'].map(get_cluster_num_str)  + '.rec.8bit.png'

In [19]:
test_set = df.loc[df['slice'] < 500]
test_set.reset_index()

Unnamed: 0,index,slice,current_cluster,filename
0,0,400,0,400\VA10_0050_0400_000.rec.8bit.png
1,1,400,1,400\VA10_0050_0400_001.rec.8bit.png
2,2,400,2,400\VA10_0050_0400_002.rec.8bit.png
3,3,400,3,400\VA10_0050_0400_003.rec.8bit.png
4,4,400,4,400\VA10_0050_0400_004.rec.8bit.png
...,...,...,...,...
12795,12795,499,123,499\VA10_0050_0499_123.rec.8bit.png
12796,12796,499,124,499\VA10_0050_0499_124.rec.8bit.png
12797,12797,499,125,499\VA10_0050_0499_125.rec.8bit.png
12798,12798,499,126,499\VA10_0050_0499_126.rec.8bit.png


In [20]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

In [21]:
batch_size = 8

# Data path: used in data generator
base_folder = os.path.join(os.getcwd(), res_folder, seg_model, seg_nd, 'cluster_{}'.format(cluster_num))

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)


test_generator = test_datagen.flow_from_dataframe(dataframe=test_set, directory=base_folder,
                                             x_col='filename',
                                             y_col=None,
                                             target_size=(700, 855),
                                             batch_size=batch_size,
                                             shuffle=False,
                                             class_mode=None,
                                             seed=7
                                             )

Found 12800 validated image filenames.


In [22]:
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [23]:
test_generator.reset()
pred=vgg.predict(test_generator,
                             steps=STEP_SIZE_TEST,
                             verbose=1)



In [25]:
pred.shape

(12800, 4)

In [26]:
predicted_class_indices = np.argmax(pred,axis=1) + 1

### get prediction and then need to add this column to test_set df.

In [28]:
test_set['class'] = predicted_class_indices

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_set['class'] = predicted_class_indices


In [None]:
for s in range(400,500):
    gold_label = get_dragonfly_label_by_slice(s)
    current_slice = test_set.loc[test_set['slice'] == s]
    pred_label = current_slice['class'].to_numpy()
    prediction = get_unsupervised_pred_by_slice(pred_label, seg_model, seg_nd, cluster_num, s)
    p, r, f1, _ = precision_recall_fscore_support(gold_label, prediction, average=None, labels=[1,2,3,4])
    macro_p, macro_f, macro_f, _ = precision_recall_fscore_support(gold_label, predicted_class_indices, average='macro', labels=[1,2,3,4])



In [33]:
current_slice = test_set.loc[test_set['slice'] == 400]

In [36]:
type(current_slice['class'].to_numpy())

numpy.ndarray

In [38]:
header = ['slice', 'pore_precision', 'pore_recall', 'pore_f1', 'gypsum_precision', 'gypsum_recall', 'gypsum_f1', 
          'celestite_precision', 'celestite_recall', 'celestite_f1', 'bassanite_precision', 'bassanite_recall', 'bassanite_f1', 'macro_p', 'macro_r', 'macro_f']
f = open('test_k-means_3d_128.csv', 'w')
writer = csv.writer(f)
writer.writerow(header)

for s in range(400,500):
    gold_label = get_dragonfly_label_by_slice(s)
    current_slice = test_set.loc[test_set['slice'] == s]
    pred_label = current_slice['class'].to_numpy()
    prediction = get_unsupervised_pred_by_slice(pred_label, seg_model, seg_nd, cluster_num, s)
    p, r, f1, _ = precision_recall_fscore_support(gold_label, prediction, average=None, labels=[1,2,3,4])
    macro_p, macro_r, macro_f, _ = precision_recall_fscore_support(gold_label, prediction, average='macro', labels=[1,2,3,4])

    data = [s, p[0], r[0], f1[0], p[1], r[1], f1[1], p[2], r[2], f1[2], p[3], r[3], f1[3], macro_p, macro_r, macro_f]
    writer.writerow(data)

f.close()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [3]:
cnn = pd.read_csv('test_k-means_3d_128.csv')

In [4]:
cnn.describe()

Unnamed: 0,slice,pore_precision,pore_recall,pore_f1,gypsum_precision,gypsum_recall,gypsum_f1,celestite_precision,celestite_recall,celestite_f1,bassanite_precision,bassanite_recall,bassanite_f1,macro_p,macro_r,macro_f
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,449.5,0.679273,0.222682,0.330162,0.860189,0.978078,0.915231,0.739044,0.609446,0.659643,0.717102,0.587355,0.644452,0.748902,0.59939,0.637372
std,29.011492,0.076717,0.054697,0.061084,0.018679,0.002961,0.009928,0.395577,0.360171,0.373835,0.032081,0.036415,0.019211,0.099576,0.099018,0.102624
min,400.0,0.482291,0.09135,0.163046,0.83475,0.968614,0.901249,0.0,0.0,0.0,0.633114,0.525038,0.589561,0.545566,0.40311,0.417662
25%,424.75,0.62765,0.184481,0.290209,0.84549,0.976655,0.907107,0.864286,0.304116,0.452637,0.698384,0.558102,0.629928,0.762444,0.523182,0.58891
50%,449.5,0.685725,0.215296,0.329824,0.853227,0.978613,0.910806,0.954531,0.837595,0.894188,0.713032,0.581181,0.644572,0.795966,0.651758,0.689367
75%,474.25,0.72973,0.254967,0.368645,0.881822,0.980085,0.927123,0.962924,0.862463,0.909047,0.726471,0.621244,0.659441,0.807553,0.669992,0.706686
max,499.0,0.894704,0.358377,0.472448,0.896064,0.984139,0.93161,0.97451,0.887928,0.926375,0.809326,0.654784,0.676845,0.855516,0.706957,0.738655
