In [33]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('ggplot')
from sklearn import neighbors, linear_model
from sklearn.model_selection import train_test_split
from skimage.feature import greycomatrix, greycoprops
import cv2
import os
from tqdm import tqdm_notebook as tqdm
import seaborn as sns
from sklearn.decomposition import PCA

In [34]:
import warnings
warnings.filterwarnings('ignore')

In [35]:
%matplotlib inline

In [36]:
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 10,8

In [37]:
img_width = 64
img_height = 64
train_root_folder = 'fibrosis_patches_png/train/'
test_root_folder = 'fibrosis_patches_png/test/'

In [38]:
df_files = pd.read_csv('train.csv')
df_files = df_files.sample(frac=1.0, random_state=42)
df_files.head()

Unnamed: 0,filename,class
16143,1a15bf60624e6ae27307ed2b63f68378.png,0
13660,6854cd6715a755b888c6e4cb366b81d9.png,0
344,c1c3feff2ca3ff7869a703aa3db228af.png,1
1034,ddd6eb7a79de1124fbdbe4bd0b10fcfb.png,1
8109,12a55627c1b72c77d16f3d7ee6bbc2ff.png,1


In [39]:
X = np.zeros((df_files.shape[0],img_width, img_height), dtype=np.uint8)
for idx,file in tqdm(enumerate(df_files['filename'])):
    X[idx] = cv2.imread(train_root_folder+file,0)

A Jupyter Widget




In [40]:
y = df_files['class']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

In [41]:
def calc_GLCM_features(X):
    GLCM_features = np.zeros((X.shape[0], 4), dtype=np.float32)
    for i in tqdm(range(X.shape[0])):
        img = X[i]
        glcm = greycomatrix(img, [2], [0], 256, symmetric=True, normed=True)
        GLCM_features[i] = np.array([greycoprops(glcm, 'dissimilarity')[0, 0],
                           greycoprops(glcm, 'correlation')[0, 0],
                           greycoprops(glcm, 'homogeneity')[0, 0],
                           greycoprops(glcm, 'contrast')[0, 0]])
    return GLCM_features

In [51]:
logistic = linear_model.LogisticRegression()

In [52]:
GLCM_X = calc_GLCM_features(X)

A Jupyter Widget




In [53]:
logistic.fit(GLCM_X, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [54]:
df_test = pd.read_csv('test.csv')
X_test = np.zeros((df_test.shape[0],img_width, img_height), dtype=np.uint8)
for idx,file in tqdm(enumerate(df_test['filename'])):
    X_test[idx] = cv2.imread(test_root_folder+file,0)
GLCM_test = calc_GLCM_features(X_test)

A Jupyter Widget




A Jupyter Widget




In [57]:
predictions = logistic.predict(GLCM_test)
df_pred = pd.DataFrame(predictions, columns=['predictions'])
df_pred = pd.concat((df_test, df_pred), axis=1)

In [58]:
logistic.score(GLCM_X, y)

0.72542673696299

In [66]:
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier

In [73]:
linear = linear_model.LinearRegression()
rf = RandomForestClassifier()
lr = LogisticRegression()
dummy = DummyClassifier()

In [68]:
GLCM_X = calc_GLCM_features(X)

A Jupyter Widget




In [62]:
linear.fit(GLCM_X, y)
.score(GLCM_X, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [70]:
rf.fit(GLCM_X, y)
rf.score(GLCM_X, y)

0.9838179255624576

In [74]:
dummy.fit(GLCM_X, y)
dummy.score(GLCM_X, y)

0.5025317116458736

In [71]:
lr.fit(GLCM_X, y)
lr.score(GLCM_X, y)

0.72542673696299

In [63]:
df_test = pd.read_csv('test.csv')
X_test = np.zeros((df_test.shape[0],img_width, img_height), dtype=np.uint8)
for idx,file in tqdm(enumerate(df_test['filename'])):
    X_test[idx] = cv2.imread(test_root_folder+file,0)
GLCM_test = calc_GLCM_features(X_test)

A Jupyter Widget




A Jupyter Widget




In [64]:
predictions = linear.predict(GLCM_test)
df_pred = pd.DataFrame(predictions, columns=['predictions'])
df_pred = pd.concat((df_test, df_pred), axis=1)

In [65]:
linear.score(GLCM_X, y)

0.24247255593777473