In [1]:
import numpy as np
import cv2
import pandas as pd

## Feature Engineering and Extraction

In [6]:
img = cv2.imread("partial_labels/sandstone_train_images.tif")
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

In [8]:
# Create dataframe and engineer features
df = pd.DataFrame()

# Feature: original pixel values
img2 = img.reshape(-1)
df["Original Image"] = img2  

In [13]:
# Feature: first set of Gabor features by applying filters to original image
# Apply 32 generated Gabor filters w/ various parameters to image 
num = 1
kernels = []
for theta in range(2):
    theta = theta / 4. * np.pi
    for sigma in (1, 3):
        for lamda in np.arange(0, np.pi, np.pi / 4):
            for gamma in (0.05, 0.5):
                gabor_label = "Gabor" + str(num)
                ksize = 5
                kernel = cv2.getGaborKernel(
                    (ksize, ksize), 
                    sigma, theta, lamda, gamma, 0, 
                    ktype=cv2.CV_32F)
                kernels.append(kernel)
                fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
                filtered_img = fimg.reshape(-1)
                df[gabor_label] = filtered_img
                print(gabor_label, ": theta=", theta, ": sigma=", sigma,
                     ": lamda=", lamda, ": gamma=", gamma)
                num += 1

Gabor1 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.05
Gabor2 : theta= 0.0 : sigma= 1 : lamda= 0.0 : gamma= 0.5
Gabor3 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor4 : theta= 0.0 : sigma= 1 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor5 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor6 : theta= 0.0 : sigma= 1 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor7 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.05
Gabor8 : theta= 0.0 : sigma= 1 : lamda= 2.356194490192345 : gamma= 0.5
Gabor9 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.05
Gabor10 : theta= 0.0 : sigma= 3 : lamda= 0.0 : gamma= 0.5
Gabor11 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.05
Gabor12 : theta= 0.0 : sigma= 3 : lamda= 0.7853981633974483 : gamma= 0.5
Gabor13 : theta= 0.0 : sigma= 3 : lamda= 1.5707963267948966 : gamma= 0.05
Gabor14 : theta= 0.0 : sigma= 3 : lamda= 1.5707963267948966 : gamma= 0.5
Gabor15 : theta= 0.0 : sigma= 3 : lamda= 2

In [16]:
# Canny edge; edge detection filter
edges = cv2.Canny(img, 100, 200)
edges1 = edges.reshape(-1)
df["Canny Edge"] = edges1

In [24]:
# Some more filters
from skimage.filters import roberts, sobel, scharr, prewitt
edge_roberts = roberts(img)
edge_roberts1 = edge_roberts.reshape(-1)
df["Roberts"] = edge_roberts1

edge_sobel = sobel(img)
edge_sobel1 = edge_sobel.reshape(-1)
df["Sobel"] = edge_sobel1

edge_scharr = scharr(img)
edge_scharr1 = edge_scharr.reshape(-1)
df["Scharr"] = edge_scharr1

edge_prewitt = prewitt(img)
edge_prewitt1 = edge_prewitt.reshape(-1)
df["Prewitt"] = edge_prewitt1

In [30]:
from scipy import ndimage as nd
gaussian_img = nd.gaussian_filter(img, sigma=3)
gaussian_img1 = gaussian_img.reshape(-1)
df["Gaussian s3"] = gaussian_img1

gaussian_img2 = nd.gaussian_filter(img, sigma=7)
gaussian_img3 = gaussian_img2.reshape(-1)
df["Gaussian s7"] = gaussian_img3

median_img = nd.median_filter(img, size=3)
median_img1 = median_img.reshape(-1)
df["Median s3"] = median_img1

# variance_img = nd.generic_filter(img, np.var, size=3)
# variance_img1 = variance_img.reshape(-1)
# df["Variance s3"] = variance_img1

In [45]:
# Add a column for mask (ground truth)
labeled_img = cv2.imread("partial_labels/sandstone_partial_labels_from_APEER_ML.tif")
labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
labeled_img1 = labeled_img.reshape(-1)
df["Label"] = labeled_img1

In [46]:
df.head()

Unnamed: 0,Original Image,Gabor1,Gabor2,Gabor3,Gabor4,Gabor5,Gabor6,Gabor7,Gabor8,Gabor9,...,Roberts,Sobel,Scharr,Prewitt,Gaussian s3,Gaussian s7,Median s3,Variance s3,Labels,Label
0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,0,0


## Data Split and Train

In [53]:
Y = df["Label"].values
X = df.drop(labels=["Label"], axis=1)

In [55]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=20)

In [59]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=10, random_state=42)

In [60]:
model.fit(X_train, Y_train)

RandomForestClassifier(n_estimators=10, random_state=42)

In [64]:
prediction_test = model.predict(X_test)

In [65]:
from sklearn import metrics
print("Accuracy = ", metrics.accuracy_score(Y_test, prediction_test))

Accuracy =  0.9997499767135174
