## Purpose
By taking a directory of SEM images from a given experiment, we want to segment AND classify every image such that we can begin looking at the data as a whole

In [1]:
# General Imports
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
import pandas as pd 
import matplotlib.pyplot as plt 
import scipy.stats as stat
import numpy as np 
#import forestsci
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
from collections import Counter
import pickle
import glob
import copy

import sys
sys.path.append("..")
from Utility.segmentation_utils import * 

image_folder = "../Images/Additional" # Path to folder, adjust for other machines
result_folder = "../Results"
model_folder = "../Models"
image_list_Si = glob.glob(f'{image_folder}/Si embed/*')
image_list_Yuanwei = glob.glob(f'{image_folder}/Images to rachel (from Yuanwei)/*/*') # Might want to separate this?



In [2]:
# Define which image paths to run
parent_experiment = "L1_2.5_5_10_nM"
all_experiments = glob.glob(os.path.join(image_folder,parent_experiment,"*"))
print(all_experiments)

# Load models
model_names = ["RF_C-MC_I-P.sav","RF_C_MC.sav","RF_I_P.sav"]
rf_CMC_IP, rf_C_MC, rf_I_P = [pickle.load(open(os.path.join(model_folder,model), 'rb'))\
                              for model in model_names]

# Define Features WHICH MUST MATCH WHAT ALL THREE MODELS WERE TRAINED ON
features=['area',
#        'equivalent_diameter', 
#        'orientation', 
    'major_axis_length',
    'minor_axis_length',
    'perimeter',
#        'min_intensity',
#        'mean_intensity',
        'max_intensity',
    'solidity',
    'major_axis_length/minor_axis_length',
    'perimeter/major_axis_length',
    'perimeter/minor_axis_length',
          'feret_diameter_max',
      'moments_hu-0',
      'moments_hu-1',
      'moments_hu-2',
      'moments_hu-3',
#      'moments_hu-4',
      'moments_hu-5',
#      'moments_hu-6',
#    'eccentricity'
         ]

['../Images/Additional/L1_2.5_5_10_nM/L1 10nM', '../Images/Additional/L1_2.5_5_10_nM/L1 5nM', '../Images/Additional/L1_2.5_5_10_nM/L1 2.5 nM']


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [8]:
# Define key helper functions
def assign_label(predicted_data,mode="C-MC_I-P"):
    '''
    Given an array of arrays, get the max column, associate that with a name, and return the fully labeled list
    Should work with 3 given modes
    '''
    valid_modes = ["C-MC_I-P","C_MC","I_P"]
    if mode not in valid_modes:
        print(f'Error: {mode} not supported')
        return -1
    label_arr = []
    for data in predicted_data:
        index = np.argmax(data)
        if mode == valid_modes[0]:
            if index == 0:
                label_arr.append("Crystal")
            if index == 1:
                label_arr.append("Incomplete")
                
        elif mode == valid_modes[1]:
            if index == 0:
                label_arr.append("Crystal")
            if index == 1:
                label_arr.append("Multiple Crystal")
                
        elif mode == valid_modes[2]:
            if index == 0:
                label_arr.append("Incomplete")
            if index == 1:
                label_arr.append("Poorly Segmented")
    return label_arr

def apply_coloring(IS,df_labeled):
    '''
    To aid study of an image, apply a colored filter over image such that we can see which regions
    are classified as which
    '''
    C_color = np.array([0,255,0])
    MC_color = np.array([255,255,0])
    I_color = np.array([255,0,0])
    P_color = np.array([0,0,255])
    color_arr = [C_color,MC_color,I_color,P_color]
    match_arr = ["Crystal","Multiple Crystal", "Incomplete", "Poorly Segmented"]
    
    region_arr = IS.grab_region_array(focused=False)
    mod_image = cv2.cvtColor(IS.img2,cv2.COLOR_BGR2RGB)
    mask_image = copy.deepcopy(mod_image)*0
    for ii in np.arange(len(labeled_arr)):
            id_label = df_labeled.at[ii,"Labels"] # assumed sorted
            color = color_arr[match_arr.index(id_label)]
            mask_image[region_arr[ii] > 0] = color
    
    final_image = cv2.addWeighted(mod_image,1,mask_image,.3,0)
    return final_image    

In [4]:
debug = []
for experiment in all_experiments[:1]:
    print(experiment)
    df_experiment = pd.DataFrame()
    experiment_images = glob.glob(os.path.join(experiment,'*')) # Beware non-images
    
    for individual_image in experiment_images:
        print(individual_image)
        IS = ImageSegmenter(individual_image)
        df_image = IS.df

        # Numerical errors (divide by 0)
        df_image.replace([np.inf, -np.inf], np.nan, inplace=True)
        for feature in features:
            df_image.dropna(subset=[feature],inplace=True)
            
        ### Split Crystal & Multicrystal from Incomplete & Poorly Segmented###
        # Split Data
        X=df_image[features]

        predicted_data = rf_CMC_IP.predict(X)
        labeled_arr = assign_label(predicted_data)
        df_image['Labels'] = labeled_arr
        print("BIG SEPARATOR")
        print(np.unique(labeled_arr))
        
        #### Subdividing further ####
        df_image_sublist = []
        for label in ['Crystal','Incomplete']:
            df_image_temp = df_image[df_image['Labels'] == label]
            rf_temp = rf_C_MC if label == 'Crystal' else rf_I_P
            label_mode = "C_MC" if label == 'Crystal' else "I_P"
            X_temp = df_image_temp[features]

            try:
                predicted_data_temp = rf_temp.predict(X_temp)
            except:
                print(f'Prediction failed on {label}, maybe 0 elements?')
                predicted_data_temp = []
            print(predicted_data_temp)
            labeled_arr = assign_label(predicted_data_temp,mode=label_mode)
            df_image_temp['Labels'] = labeled_arr
            print(np.unique(df_image_temp["Labels"]))
            df_image_sublist.append(df_image_temp)

        df_image = pd.concat(df_image_sublist)
        df_image.sort_values(by="Region")
        debug = (individual_image,df_image)

        df_experiment = pd.concat([df_experiment,df_image])
    print(np.unique(df_image["Labels"]))
    df_experiment.to_csv(os.path.join(result_folder,experiment.split("/")[-1]))


../Images/Additional/L1_2.5_5_10_nM/L1 10nM
../Images/Additional/L1_2.5_5_10_nM/L1 10nM/L1_10nM_09.tif
Image Segmenter on L1_10nM_09 created!
[1 2 3 4 5]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.76599702 0.23400298]]
['Crystal']
[[0.56318824 0.43681176]
 [0.63353795 0.36646205]
 [0.51698909 0.48301091]
 [0.59257812 0.40742187]]
['Incomplete']
../Images/Additional/L1_2.5_5_10_nM/L1 10nM/L1_10nM_10.tif
Image Segmenter on L1_10nM_10 created!
[ 1  2  3  4  5  6  7  8  9 10 11 12]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.74951637 0.25048363]
 [0.81285342 0.18714658]
 [0.60900918 0.39099082]
 [0.68917411 0.31082589]]
['Crystal']
[[0.65721106 0.34278894]
 [0.53318452 0.46681548]
 [0.7062128  0.2937872 ]
 [0.48044395 0.51955605]
 [0.6203125  0.3796875 ]
 [0.68197545 0.31802455]
 [0.624938   0.375062  ]
 [0.64570313 0.35429688]]
['Incomplete' 'Poorly Segmented']
../Images/Additional/L1_2.5_5_10_nM/L1 10nM/L1_10nM_03.tif
Image Segmenter on L1_10nM_03 created!
[1 2 3 4 5]
BIG SEPARATOR
['Crys

  clusters['major_axis_length/minor_axis_length'] = clusters['major_axis_length']/clusters['minor_axis_length']
  clusters['perimeter/major_axis_length'] = clusters['perimeter']/clusters['major_axis_length']
  clusters['perimeter/minor_axis_length'] = clusters['perimeter']/clusters['minor_axis_length']


[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57]
BIG SEPARATOR
['Crystal' 'Incomplete']
[[0.68771081 0.31228919]
 [0.70359861 0.29640139]
 [0.75792411 0.24207589]
 [0.49814608 0.50185392]
 [0.70583076 0.29416924]
 [0.75365442 0.24634558]
 [0.85584077 0.14415923]
 [0.76912202 0.23087798]
 [0.85362723 0.14637277]
 [0.64608373 0.35391627]
 [0.68640253 0.31359747]
 [0.65203373 0.34796627]
 [0.65486111 0.34513889]]
['Crystal' 'Multiple Crystal']
[[0.76912202 0.23087798]
 [0.61274802 0.38725198]
 [0.74985119 0.25014881]
 [0.71021205 0.28978795]
 [0.70085565 0.29914435]
 [0.51531498 0.48468502]
 [0.55853795 0.44146205]
 [0.59031498 0.40968502]
 [0.78690927 0.21309073]
 [0.65546875 0.34453125]
 [0.54114583 0.45885417]
 [0.63716518 0.36283482]
 [0.60078125 0.39921875]
 [0.59448785 0.40551215]
 [0.77875744 0.22124256]
 [0.59088542 0.40911458]
 [0.72824901 0.27175099]
 [0.

In [9]:
apply_coloring(ImageSegmenter(debug[0]),debug[1])

Image Segmenter on L1_10nM_02 created!
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]


error: OpenCV(4.6.0) :-1: error: (-5:Bad argument) in function 'addWeighted'
> Overload resolution failed:
>  - addWeighted() missing required argument 'gamma' (pos 5)
>  - addWeighted() missing required argument 'gamma' (pos 5)


In [None]:
import matplotlib.pyplot as plt
plt.imshow(IS.markers)

In [None]:
# Quick check so one can see what region was "forgotten" by image segmenter
img_forgotten = copy.deepcopy(IS.img2)
img_forgotten[IS.markers > 10] = 0
plt.imshow(img_forgotten)

In [None]:
# load first RF model from disk
model_name = "../Models/RF_C-MC_I-P.sav"
loaded_model = pickle.load(open(model_name, 'rb'))


In [None]:
# Prepare data
df_image_C_MC = df_image[df_image['Labels'] == 'Crystal']
        df_image_I_P = df_image[df_image['Labels'] == 'Incomplete']
        
        X_C_MC = df_image_C_MC[features]
        X_I_P = df_image_I_P[features]
        
        predicted_data_C_MC = rf_C_MC.predict(X_C_MC)
        predicted_data_I_P = rf_I_P.predict(X_I_P)
        
        labeled_arr = assign_label(predicted_data_C_MC)
        df_image_C_MC['Labels'] = labeled_arr
        labeled_arr = assign_label(predicted_data_I_P)
        df_image_I_P['Labels'] = labeled_arr
        
        df_image = pd.concat([df_image_C_MC,df_image_I_P])
        df_image.sort_values(by="Region")
        df_experiment = pd.concat([df_experiment,df_image])

df = IS.df
#print(np.unique(df["Filename"]))

# Modify Data (Remove illegal data OR set different runtime)

# Numerical errors (divide by 0)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
for feature in features:
    df.dropna(subset=[feature],inplace=True)

# Split Data (TO-DO)
X=df[features]

# Make adjustments to data
X=X # For RF, feature normalization NOT NEEDED

# NOTE (2022.08.24): What do I have to do with the classification afterwards???

#y
#ohe = OneHotEncoder(sparse=False)
#y = ohe.fit_transform(df[[label]])

#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

In [None]:

        
labeled_arr = assign_label(predicted_data)
IS.df['Labels'] = labeled_arr
IS.override_exists=True
IS.create_csv()
print(IS._csv_file)

In [None]:
df_Crystal = IS.df[IS.df["Labels"] == "Crystal"]
df_Incomplete = IS.df[IS.df["Labels"] != "Crystal"]
print(f'Num Crystals: {len(df_Crystal)} \nNum Incomplete: {len(df_Incomplete)}')

In [None]:

for ii in np.arange(len(IS.region_arr)):
    clear_output(wait=False)
    plt.imshow(IS.region_arr[ii])
    plt.show()
    print(f'Model Thinks: {labeled_arr[ii]}')
    input('Next (hit enter)')
