## Purpose
By taking a directory of SEM images from a given experiment, we want to segment AND classify every image such that we can begin looking at the data as a whole

In [1]:
# General Imports
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
import pandas as pd 
import matplotlib.pyplot as plt 
import scipy.stats as stat
import numpy as np 
#import forestsci
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
from collections import Counter
import pickle
import glob
import copy

import sys
sys.path.append("..")
from Utility.segmentation_utils import * 

image_folder = "../Images/Additional" # Path to folder, adjust for other machines
result_folder = "../Results"
model_folder = "../Models"
image_list_Si = glob.glob(f'{image_folder}/Si embed/*')
image_list_Yuanwei = glob.glob(f'{image_folder}/Images to rachel (from Yuanwei)/*/*') # Might want to separate this?



In [2]:
# Define which image paths to run
parent_experiment = "L1_2.5_5_10_nM"
all_experiments = glob.glob(os.path.join(image_folder,parent_experiment,"*"))
all_experiments = [glob.glob(os.path.join("../Images/Training/*"))]
print(all_experiments)

# Load models
model_names = ["RF_C-MC_I-P.sav","RF_C_MC.sav","RF_I_P.sav"]
rf_CMC_IP, rf_C_MC, rf_I_P = [pickle.load(open(os.path.join(model_folder,model), 'rb'))\
                              for model in model_names]

# Define Features WHICH MUST MATCH WHAT ALL THREE MODELS WERE TRAINED ON
features=['area',
#        'equivalent_diameter', 
#        'orientation', 
    'major_axis_length',
    'minor_axis_length',
    'perimeter',
#        'min_intensity',
#        'mean_intensity',
        'max_intensity',
    'solidity',
    'major_axis_length/minor_axis_length',
    'perimeter/major_axis_length',
    'perimeter/minor_axis_length',
          'feret_diameter_max',
      'moments_hu-0',
      'moments_hu-1',
      'moments_hu-2',
      'moments_hu-3',
#      'moments_hu-4',
      'moments_hu-5',
#      'moments_hu-6',
#    'eccentricity'
         ]

[['../Images/Training/4 nM 7.bmp', '../Images/Training/39.5 hold 8.bmp', '../Images/Training/39.5 hold 4.bmp', '../Images/Training/39.5 hold 3.bmp', '../Images/Training/39.5 hold 7.bmp', '../Images/Training/39.5 hold 2.bmp', '../Images/Training/4 nM 3.bmp', '../Images/Training/4 nM 10.bmp', '../Images/Training/39.5 hold 6.bmp', '../Images/Training/4 nM 8.bmp', '../Images/Training/4 nM 9.bmp', '../Images/Training/39.5 hold 5.bmp', '../Images/Training/4 nM 1.bmp', '../Images/Training/4 nM 5.bmp', '../Images/Training/4 nM 6.bmp', '../Images/Training/39.5 hold 1.bmp', '../Images/Training/4 nM 4.bmp']]


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
# Define key helper functions
def assign_label(predicted_data,mode="C-MC_I-P"):
    '''
    Given an array of arrays, get the max column, associate that with a name, and return the fully labeled list
    Should work with 3 given modes
    '''
    valid_modes = ["C-MC_I-P","C_MC","I_P"]
    if mode not in valid_modes:
        print(f'Error: {mode} not supported')
        return -1
    label_arr = []
    for data in predicted_data:
        index = np.argmax(data)
        if mode == valid_modes[0]:
            if index == 0:
                label_arr.append("Crystal")
            if index == 1:
                label_arr.append("Incomplete")
                
        elif mode == valid_modes[1]:
            if index == 0:
                label_arr.append("Crystal")
            if index == 1:
                label_arr.append("Multiple Crystal")
                
        elif mode == valid_modes[2]:
            if index == 0:
                label_arr.append("Incomplete")
            if index == 1:
                label_arr.append("Poorly Segmented")
    return label_arr

def apply_coloring(IS,df_labeled):
    '''
    To aid study of an image, apply a colored filter over image such that we can see which regions
    are classified as which
    Make borders slightly darker
    '''
    C_color = np.array([0,255,0])
    MC_color = np.array([255,255,0])
    I_color = np.array([255,0,0])
    P_color = np.array([0,0,255])
    color_arr = [C_color,MC_color,I_color,P_color]
    match_arr = ["Crystal","Multiple Crystal", "Incomplete", "Poorly Segmented"]
    
    region_arr = IS.grab_region_array(focused=False)
    mod_image = cv2.cvtColor(IS.img2,cv2.COLOR_BGR2RGB)
    mask_image = copy.deepcopy(mod_image)*0
    for ii in np.arange(len(labeled_arr)):
            id_label = df_labeled.at[ii,"Labels"] # assumed sorted
            color = color_arr[match_arr.index(id_label)]
            mask_image[region_arr[ii] > 0] = color
    
    final_image = cv2.addWeighted(mod_image,.5,mask_image,.5,0)
    return final_image    

In [4]:
debug = []
for experiment in all_experiments[:1]:
    print(experiment)
    df_experiment = pd.DataFrame()
    experiment_images = experiment[:-2]#glob.glob(os.path.join(experiment,'*')) # Beware non-images
    
    for individual_image in experiment_images:
        print(individual_image)
        IS = ImageSegmenter(individual_image)
        df_image = IS.df

        # Numerical errors (divide by 0)
        df_image.replace([np.inf, -np.inf], np.nan, inplace=True)
        for feature in features:
            df_image.dropna(subset=[feature],inplace=True)
            
        ### Split Crystal & Multicrystal from Incomplete & Poorly Segmented###
        # Split Data
        X=df_image[features]

        predicted_data = rf_CMC_IP.predict(X)
        labeled_arr = assign_label(predicted_data)
        df_image['Labels'] = labeled_arr
        print("BIG SEPARATOR")
        print(np.unique(labeled_arr))
        
        #### Subdividing further ####
        df_image_sublist = []
        for label in ['Crystal','Incomplete']:
            df_image_temp = df_image[df_image['Labels'] == label]
            rf_temp = rf_C_MC if label == 'Crystal' else rf_I_P
            label_mode = "C_MC" if label == 'Crystal' else "I_P"
            X_temp = df_image_temp[features]

            try:
                predicted_data_temp = rf_temp.predict(X_temp)
            except:
                print(f'Prediction failed on {label}, maybe 0 elements?')
                predicted_data_temp = []
            print(predicted_data_temp)
            labeled_arr = assign_label(predicted_data_temp,mode=label_mode)
            df_image_temp['Labels'] = labeled_arr
            print(np.unique(df_image_temp["Labels"]))
            df_image_sublist.append(df_image_temp)

        df_image = pd.concat(df_image_sublist)
        df_image.sort_values(by="Region",inplace=True)
        debug = (individual_image,df_image)

        df_experiment = pd.concat([df_experiment,df_image])
    print(np.unique(df_image["Labels"]))
    df_experiment.to_csv(os.path.join(result_folder,experiment.split("/")[-1]))


['../Images/Training/4 nM 7.bmp', '../Images/Training/39.5 hold 8.bmp', '../Images/Training/39.5 hold 4.bmp', '../Images/Training/39.5 hold 3.bmp', '../Images/Training/39.5 hold 7.bmp', '../Images/Training/39.5 hold 2.bmp', '../Images/Training/4 nM 3.bmp', '../Images/Training/4 nM 10.bmp', '../Images/Training/39.5 hold 6.bmp', '../Images/Training/4 nM 8.bmp', '../Images/Training/4 nM 9.bmp', '../Images/Training/39.5 hold 5.bmp', '../Images/Training/4 nM 1.bmp', '../Images/Training/4 nM 5.bmp', '../Images/Training/4 nM 6.bmp', '../Images/Training/39.5 hold 1.bmp', '../Images/Training/4 nM 4.bmp']
../Images/Training/4 nM 7.bmp
Image Segmenter on 4 nM 7 created!
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.7890253  0.2109747 ]
 [0.88930432 0.11069568]
 [0.89972098 0.10027902]
 [0.69962178 0.30037822]
 [0.81356027 0.18643973]
 [0.73106399 0.26893601]
 [0.84544271 0.15455729]
 [0.73632812 0.26367187]
 [0.64645957 0.35354043]
 [0.8468936  0.1531064 ]
 [0.84670759 0.15329241]
 [0.49345858 0.506

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.89190848 0.10809152]
 [0.68449901 0.31550099]
 [0.48937872 0.51062128]
 [0.79023437 0.20976563]
 [0.85182292 0.14817708]
 [0.69042659 0.30957341]
 [0.80719866 0.19280134]
 [0.85362723 0.14637277]
 [0.83212426 0.16787574]
 [0.60063864 0.39936136]
 [0.83212426 0.16787574]
 [0.77449777 0.22550223]
 [0.90492932 0.09507068]
 [0.4921689  0.5078311 ]
 [0.88409598 0.11590402]
 [0.84854911 0.15145089]
 [0.86515997 0.13484003]
 [0.89157366 0.10842634]
 [0.84856771 0.15143229]
 [0.85883557 0.14116443]
 [0.91847098 0.08152902]
 [0.81419271 0.18580729]
 [0.89190848 0.10809152]
 [0.70900298 0.29099702]
 [0.51016245 0.48983755]
 [0.6937128  0.3062872 ]
 [0.87591146 0.12408854]
 [0.8736

  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['perimeter/minor_axis_length'] = clusters['perimeter']/clusters['minor_axis_length']


[  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.89711682 0.10288318]
 [0.68314732 0.31685268]
 [0.67263145 0.32736855]
 [0.7859561  0.2140439 ]
 [0.80872396 0.19127604]
 [0.8718564  0.1281436 ]
 [0.86028646 0.13971354]
 [0.74289435 0.25710565]
 [0.62001488 0.37998512]
 [0.8749814  0.1250186 ]
 [0.77475818 0.22524182]
 [0.59329117 0.40670883]
 [0.52474578 0.47525422]
 [0.74337178 0.25662822]
 [0.70951761 0.29048239]
 [0.85299479 0.14700521]
 [0.66052827 0.33947173]
 [0.50653522 0.49346478]
 [0.838

  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['perimeter/major_axis_length'] = clusters['perimeter']/clusters['major_axis_length']
  clusters['perimeter/minor_axis_length'] = clusters['perimeter']/clusters['minor_axis_length']


Image Segmenter on 39.5 hold 6 created!
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.85768229 0.14231771]
 [0.75107887 0.24892113]
 [0.89893973 0.10106027]
 [0.87706473 0.12293527]
 [0.87070312 0.12929687]
 [0.58270709 0.41729291]
 [0.81123512 0.18876488]
 [0.6641245  0.3358755 ]
 [0.50651662 0.49348338]
 [0.79283854 0.20716146]
 [0.89209449 0.10790551]
 [0.6062748  0.3937252 ]
 [0.81940104 0.18059896]
 [0.79335937 0.20664062]
 [0.74205729 0.25794271]
 [0.83098958 0.16901042]
 [0.60541533 0.39458467]
 [0.8109561  0.1890439 ]
 [0.80228795 0.19771205]
 [0.86940104 0.13059896]
 [0.86907862 0.13092138]
 [0.84596354 0.15403646]
 [0.85898437 0.14101563]
 [0.88539807 0.11460193]
 [0.72472098 0.27527902]
 [0.83018973 0.16981027]
 [0.85731027 0.14268973]
 [0.81002604 0.18997396]
 [0.87200521 0.12799479]
 [0.92367932

  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['perimeter/major_axis_length'] = clusters['perimeter']/clusters['major_axis_length']
  clusters['perimeter/minor_axis_length'] = clusters['perimeter']/clusters['minor_axis_length']


Image Segmenter on 4 nM 9 created!


  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['perimeter/minor_axis_length'] = clusters['perimeter']/clusters['minor_axis_length']


[  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
 145 146 147 148 149 150 151 152 153 154 155 156 157 158]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.85247396 0.14752604]
 [0.8718192  0.1281808 ]
 [0.8671689  0.1328311 ]
 [0.81858259 0.18141741]
 [0.69198289 0.30801711]
 [0.92576265 0.07423735]
 [0.86884301 0.13115699]
 [0.89190848 0.10809152]
 [0.62463418 0.37536582]
 [0.84321057 0.15678943]
 [0.77799479 0.22200521]
 [0.75505952 0.24494048]
 [0.79127604 0.2087

Image Segmenter on 4 nM 5 created!


  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['perimeter/minor_axis_length'] = clusters['perimeter']/clusters['minor_axis_length']


[  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.64139623 0.35860377]
 [0.84856771 0.15143229]
 [0.86809896 0.13190104]
 [0.68464162 0.31535838]
 [0.58625611 0.41374389]
 [0.86028646 0.13971354]
 [0.86772693 0.13227307]
 [0.75951761 0.24048239]
 [0.78606771 0.21393229]
 [0.86899182 0.13100818]
 [0.67451637 0.32548363]
 [0.89190848 0.10809152]
 [0.89972098 0.10027902]
 [0.59011037 0.40988963]
 [0.65404266 0.34595734]
 [0.89190848 0.10809152]
 [0.79287574 0.20712426]
 [0.76731771 0.23268229]
 [0

AttributeError: 'list' object has no attribute 'split'

In [None]:
test = apply_coloring(ImageSegmenter(debug[0]),debug[1])
print(np.unique(debug[1]["Labels"]))
plt.figure(figsize = (10,10))
plt.imshow(test)

In [None]:
debug_IS = ImageSegmenter(debug[0])
plt.imshow(debug_IS.markers)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(IS.thresh)

In [None]:
# Quick check so one can see what region was "forgotten" by image segmenter
img_forgotten = copy.deepcopy(IS.img2)
img_forgotten[IS.markers > 10] = 0
plt.imshow(img_forgotten)

In [None]:
# load first RF model from disk
model_name = "../Models/RF_C-MC_I-P.sav"
loaded_model = pickle.load(open(model_name, 'rb'))


In [None]:
# Prepare data
df_image_C_MC = df_image[df_image['Labels'] == 'Crystal']
        df_image_I_P = df_image[df_image['Labels'] == 'Incomplete']
        
        X_C_MC = df_image_C_MC[features]
        X_I_P = df_image_I_P[features]
        
        predicted_data_C_MC = rf_C_MC.predict(X_C_MC)
        predicted_data_I_P = rf_I_P.predict(X_I_P)
        
        labeled_arr = assign_label(predicted_data_C_MC)
        df_image_C_MC['Labels'] = labeled_arr
        labeled_arr = assign_label(predicted_data_I_P)
        df_image_I_P['Labels'] = labeled_arr
        
        df_image = pd.concat([df_image_C_MC,df_image_I_P])
        df_image.sort_values(by="Region")
        df_experiment = pd.concat([df_experiment,df_image])

df = IS.df
#print(np.unique(df["Filename"]))

# Modify Data (Remove illegal data OR set different runtime)

# Numerical errors (divide by 0)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
for feature in features:
    df.dropna(subset=[feature],inplace=True)

# Split Data (TO-DO)
X=df[features]

# Make adjustments to data
X=X # For RF, feature normalization NOT NEEDED

# NOTE (2022.08.24): What do I have to do with the classification afterwards???

#y
#ohe = OneHotEncoder(sparse=False)
#y = ohe.fit_transform(df[[label]])

#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

In [None]:

        
labeled_arr = assign_label(predicted_data)
IS.df['Labels'] = labeled_arr
IS.override_exists=True
IS.create_csv()
print(IS._csv_file)

In [None]:
df_Crystal = IS.df[IS.df["Labels"] == "Crystal"]
df_Incomplete = IS.df[IS.df["Labels"] != "Crystal"]
print(f'Num Crystals: {len(df_Crystal)} \nNum Incomplete: {len(df_Incomplete)}')

In [None]:

for ii in np.arange(len(IS.region_arr)):
    clear_output(wait=False)
    plt.imshow(IS.region_arr[ii])
    plt.show()
    print(f'Model Thinks: {labeled_arr[ii]}')
    input('Next (hit enter)')
