In [73]:
from PIL import Image
from PIL import ImageDraw
import timeit
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
from skimage.feature import greycomatrix, greycoprops
import matplotlib.image as mpimg 
from math import sqrt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import pandas as pd
from sklearn import preprocessing
import random

In [74]:
map_8bit_to_3bit = [i // 32 for i in range(256)]
model = {}
model_test = {}
neighbor = 3
time_modelling_start = timeit.default_timer()
model["cap"] = []
model["tulis"] = []
model_test["cap"] = []
model_test["tulis"] = []

In [75]:
#Pre Processing
def load_img(img_path):
    return Image.open(img_path).convert('L')

def get_img_size(img):
    return img.size

def print_img(img):
    plt.imshow(img)
    
def get_img_colors(img, sampling_count):
    img_width, img_height = get_img_size(img)
    
    sample_width = int(img_width / sampling_count)
    sample_height = int(img_height / sampling_count)
    
    half_sample_width = int(sample_width/2)
    half_sample_height = int(sample_height/2)

    img_colors = []

    width_constraint = img_width - (2 * sample_width)
    height_constraint = img_height - (2 * sample_height)
   
    row = 0
    for point_x in range(half_sample_width, img_width, sample_width):
        if(row == sampling_count):
            break
        column = 0
        for point_y in range(half_sample_height, img_height, sample_height):
            if(column == sampling_count):
                break
            img_colors.append(img.getpixel((point_x, point_y)))
                
            column += 1
                
        row += 1
            
    return img_colors

def construct_img(img_colors, img_dimension, sampling_count):
    img_new = Image.new('RGB', (img_dimension), (255, 255, 255))
    draw = ImageDraw.Draw(img_new)
    
    img_new_width = img_dimension[0]
    img_new_height = img_dimension[1]

    sample_width = int(img_new_width/sampling_count)
    sample_height = int(img_new_height/sampling_count)
    loop_count = 0

    img_colors_len = len(img_colors)

    for w in range(0, img_new_width, sample_width):
        for h in range(0, img_new_height, sample_height):
            if(loop_count == img_colors_len):
                break
            
            current_color = (img_colors[loop_count], img_colors[loop_count], img_colors[loop_count])
            draw.rectangle((w, h, w+sample_width, h+sample_height), fill=current_color)
            loop_count += 1
    
    return img_new

def get_3bit_colors(img_colors):
    loop_count = 0
    
    for img_color in img_colors:
        img_colors[loop_count] = map_8bit_to_3bit[img_color]
        
        loop_count += 1
        
    return img_colors

def get_img_matrix(img_colors):
    img_matrix = [] 
    loop_count = 0
    
    loop_count = 0
    img_square_dimension = int(math.sqrt(len(img_colors)))
    
    for row in range(img_square_dimension):
        temp_row = []
        for col in range(img_square_dimension):
            temp_row.append(img_colors[loop_count])
            
            loop_count += 1
        img_matrix.append(temp_row)
        
    return img_matrix


In [76]:
#Features Extraction
def feature_extract(img, sampling_count, class_name):
    glcm_component=[]
    img_colors = get_img_colors(img, sampling_count)
    img_3bit_colors = get_3bit_colors(img_colors)
    img_matrix = get_img_matrix(img_3bit_colors)
    glcm_matrix=greycomatrix(img_matrix, distances=[1], angles=[0], levels=12, symmetric=False, normed=False)
    component = ['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity']
    for x in component:
        glcm_component.append(greycoprops(glcm_matrix, x)[0][0])
    glcm_component.append(class_name)
    return(glcm_component)

In [77]:
#Modelling
def get_class_names(training_folder_path):
    return os.listdir(training_folder_path)

def make_a_model(class_name, features, learning_rate):
    random_splitter = random.uniform(0, 1)
    if(random_splitter <= learning_rate):
        model[class_name].append(features)
    else:
        model_test[class_name].append(features)
        
def to_dataframe(model):
    models = pd.DataFrame(model['cap'] + model['tulis'], columns=['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity', 'class'])
    return models

In [78]:
#Classification
def naive_bayes(image):
    prob = 0
    probability = 0
    for class_name in class_names:
        for i in range(5):
            a = (image[i]-mean[class_name][i])**2/(2*variance[class_name][i])
            b = 1/(np.sqrt(2*3.14*variance[class_name][i]))
            prob+=b* np.exp(a)
        if(prob>probability):
            probability = prob
            probability_class = class_name
    return probability_class

In [127]:
#Validation
def validate(sampling_count):
    right = 0
    total = 0
    for class_name in class_names:
        validate_img_paths = glob.glob(validation_folder_path + class_name + '/' +img_type)
        for images in validate_img_paths:
            img = load_img(images)
            training_img_class_name = class_name
            image = feature_extract(img, sampling_count, training_img_class_name)
            predict = naive_bayes(image)
            class_image = class_name
            if(predict == class_name):
                   right+=1
            total +=1
        accuracy = (right/total) * 100
    print("Accuracy: " + str(accuracy) +"%")

In [80]:
#Testing

def testing():
    right = 0
    total = 0
    for class_name in class_names:
        for image in model_test[class_name]:
            predict = naive_bayes(image)
            class_image = class_name
            if(predict == class_name):
                   right+=1
            total +=1
    accuracy = (right/total) * 100
    print("Accuracy: " + str(accuracy) +"%")

def testing_split(sampling_count):
    right = 0.0;
    total = 0.0;
    accuracy = 0.0;
    class_names = get_class_names(validation_folder_path)
    for class_name in class_names:
        for test_img in model_test[class_name]:
            predict_class = testing_knn(test_img, neighbor)
            class_image = class_name
            if(predict_class == class_image):
                right+=1
            total +=1
    accuracy = (right/total) * 100
    print("Accuracy: " + str(accuracy) +"%")
    
    
def testing(sampling_count):
    testing_img_paths = glob.glob(test_folder_path + '/' + img_type)
    for testing_img_path in testing_img_paths:
        imgs = load_img(testing_img_path)
        predict_class = knn(imgs, sampling_count, neighbor)
        print("Categorized as " + predict_class)
        plt.imshow(Image.open(testing_img_path))    

In [None]:
def testings():
    total={}
    total["tulis"] = models.loc[models['class'] == 'tulis']
    total["cap"] = models.loc[models['class'] == 'cap']
    mean = {}
    variance = {}
    mean["tulis"] = []
    variance["tulis"] = []
    mean["cap"] = []
    variance["cap"] = []
    tulis_probability = (total["tulis"].count()/models.count())[0]
    cap_probability = (total["cap"].count()/models.count())[0]
    for a in ["tulis", "cap"]:
        for b in ['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity']:
            mean[a].append(total[a][b].mean())
            variance[a].append(total[a][b].var())
    testing()

In [81]:
#Training Testing

learning_rate = 0.8
sampling_count = 128
img_type = '*.jpg'
root_path = './'
training_folder_path = root_path + 'glcm/training/'
validation_folder_path = root_path + 'glcm/validation/'
test_folder_path = root_path + 'glcm/test/'

class_names = get_class_names(training_folder_path)

for i in range(1):
    for class_name in class_names:
        training_img_paths = glob.glob(training_folder_path + class_name + '/' + img_type)
        for training_img_path in training_img_paths:
            training_img = load_img(training_img_path)
            training_img_class_name = class_name
            feature = feature_extract(training_img, sampling_count, training_img_class_name)
            make_a_model(class_name, feature, learning_rate)
        
    models = to_dataframe(model)
    models_test = to_dataframe(model_test)
    testings()



time_modelling_stop = timeit.default_timer()
print('Time elapsed to make this model is ' + str(time_modelling_stop - time_modelling_start) + ' seconds.')

Time elapsed to make this model is 17.30380699999978 seconds.


In [92]:
models

Unnamed: 0,contrast,homogeneity,energy,correlation,ASM,dissimilarity,class
0,1.700910,0.703605,0.497673,0.220628,0.247679,0.765625,cap
1,1.482406,0.707620,0.493741,0.193604,0.243781,0.726378,cap
2,1.933071,0.657590,0.458955,0.099647,0.210639,0.878814,cap
3,1.564776,0.695369,0.498445,0.130918,0.248447,0.760273,cap
4,1.989850,0.655886,0.457202,0.079423,0.209033,0.890194,cap
...,...,...,...,...,...,...,...
436,1.783342,0.704276,0.524339,0.082489,0.274932,0.776821,tulis
437,1.655081,0.749280,0.542006,0.240940,0.293771,0.678334,tulis
438,1.721088,0.720032,0.549986,0.078231,0.302484,0.741265,tulis
439,1.612266,0.718465,0.545094,0.022031,0.297127,0.729269,tulis


In [110]:
tulis_probability, cap_probability, models.count()[0]
total["cap"]["energy"].var()

0.03186569805549487

In [113]:
def naive_bayes(image):
    prob = 0
    probability = 0
    for class_name in class_names:
        for i in range(5):
            a = (image[i]-mean[class_name][i])**2/(2*variance[class_name][i])
            b = 1/(np.sqrt(2*3.14*variance[class_name][i]))
            prob+=b* np.exp(a)
        if(prob>probability):
            probability = prob
            probability_class = class_name
    return probability_class

In [131]:
right = 0
total = 0
for class_name in class_names:
    for image in model_test[class_name]:
        predict = naive_bayes(image)
        class_image = class_name
        if(predict == class_name):
               right+=1
        total +=1
accuracy = (right/total) * 100
print("Accuracy: " + str(accuracy) +"%")

Accuracy: 41.05263157894737%


In [128]:
validate(128)

Accuracy: 53.90625%


In [111]:
a = (120-90)/(2*25)
b = 1/(np.sqrt(2*3.14*25))

In [112]:
b = b * np.exp(a)
b

0.14542091165335422

In [26]:
models_test

Unnamed: 0,contrast,homogeneity,energy,correlation,ASM,dissimilarity,class
0,2.582923,0.616184,0.412878,0.087715,0.170468,1.043184,cap
1,1.988250,0.612896,0.313040,0.156078,0.097994,0.964136,cap
2,2.389333,0.564944,0.296183,0.028619,0.087724,1.106853,cap
3,2.790908,0.559394,0.283801,0.080057,0.080543,1.172675,cap
4,2.219119,0.574869,0.295172,0.106188,0.087127,1.064469,cap
...,...,...,...,...,...,...,...
97,1.967089,0.695199,0.518430,0.040678,0.268769,0.818959,tulis
98,1.623031,0.714436,0.544943,0.011827,0.296963,0.738558,tulis
99,1.594796,0.716647,0.541391,0.021616,0.293105,0.728900,tulis
100,1.783342,0.704276,0.524339,0.082489,0.274932,0.776821,tulis


Accuracy: 0.0%
Accuracy: 54.90196078431373%


In [36]:
(total["tulis"].count()/models.count())[0] + (total["cap"].count()/models.count())[0]

1.0