In [1]:
from PIL import Image
from PIL import ImageDraw
import timeit
import glob
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
from skimage.feature import greycomatrix, greycoprops
import matplotlib.image as mpimg 
from math import sqrt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import pandas as pd
from sklearn import preprocessing
import random

In [2]:
map_8bit_to_3bit = [i // 32 for i in range(256)]
model = {}
model_test = {}
neighbor = 3
time_modelling_start = timeit.default_timer()
model["cap"] = []
model["tulis"] = []
model_test["cap"] = []
model_test["tulis"] = []
mean = {}
variance = {}
mean["tulis"] = []
variance["tulis"] = []
mean["cap"] = []
variance["cap"] = []

In [3]:
#Pre Processing
def load_img(img_path):
    return Image.open(img_path).convert('L')

def get_img_size(img):
    return img.size

def print_img(img):
    plt.imshow(img)
    
def get_img_colors(img, sampling_count):
    img_width, img_height = get_img_size(img)
    
    sample_width = int(img_width / sampling_count)
    sample_height = int(img_height / sampling_count)
    
    half_sample_width = int(sample_width/2)
    half_sample_height = int(sample_height/2)

    img_colors = []

    width_constraint = img_width - (2 * sample_width)
    height_constraint = img_height - (2 * sample_height)
   
    row = 0
    for point_x in range(half_sample_width, img_width, sample_width):
        if(row == sampling_count):
            break
        column = 0
        for point_y in range(half_sample_height, img_height, sample_height):
            if(column == sampling_count):
                break
            img_colors.append(img.getpixel((point_x, point_y)))
                
            column += 1
                
        row += 1
            
    return img_colors

def construct_img(img_colors, img_dimension, sampling_count):
    img_new = Image.new('RGB', (img_dimension), (255, 255, 255))
    draw = ImageDraw.Draw(img_new)
    
    img_new_width = img_dimension[0]
    img_new_height = img_dimension[1]

    sample_width = int(img_new_width/sampling_count)
    sample_height = int(img_new_height/sampling_count)
    loop_count = 0

    img_colors_len = len(img_colors)

    for w in range(0, img_new_width, sample_width):
        for h in range(0, img_new_height, sample_height):
            if(loop_count == img_colors_len):
                break
            
            current_color = (img_colors[loop_count], img_colors[loop_count], img_colors[loop_count])
            draw.rectangle((w, h, w+sample_width, h+sample_height), fill=current_color)
            loop_count += 1
    
    return img_new

def get_3bit_colors(img_colors):
    loop_count = 0
    
    for img_color in img_colors:
        img_colors[loop_count] = map_8bit_to_3bit[img_color]
        
        loop_count += 1
        
    return img_colors

def get_img_matrix(img_colors):
    img_matrix = [] 
    loop_count = 0
    
    loop_count = 0
    img_square_dimension = int(math.sqrt(len(img_colors)))
    
    for row in range(img_square_dimension):
        temp_row = []
        for col in range(img_square_dimension):
            temp_row.append(img_colors[loop_count])
            
            loop_count += 1
        img_matrix.append(temp_row)
        
    return img_matrix


In [4]:
#Features Extraction
def feature_extract(img, sampling_count, class_name):
    glcm_component=[]
    img_colors = get_img_colors(img, sampling_count)
    img_3bit_colors = get_3bit_colors(img_colors)
    img_matrix = get_img_matrix(img_3bit_colors)
    glcm_matrix=greycomatrix(img_matrix, distances=[1], angles=[0], levels=12, symmetric=False, normed=False)
    component = ['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity']
    for x in component:
        glcm_component.append(greycoprops(glcm_matrix, x)[0][0])
    glcm_component.append(class_name)
    return(glcm_component)

In [5]:
#Modelling
def get_class_names(training_folder_path):
    return os.listdir(training_folder_path)

def make_a_model(class_name, features, learning_rate):
    random_splitter = random.uniform(0, 1)
    if(random_splitter <= learning_rate):
        model[class_name].append(features)
    else:
        model_test[class_name].append(features)
        
def to_dataframe(model):
    models = pd.DataFrame(model['cap'] + model['tulis'], columns=['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity', 'class'])
    return models

In [6]:
#Classification
def naive_bayes(image, cap_probability, tulis_probability):
    prob = 1
    probability = 0.00
    for class_name in class_names:
        if(class_name=="tulis"):
            prob=tulis_probability
        elif(class_name=="cap"):
            prob=cap_probability
        for i in range(5):
            a = (image[i]-mean[class_name][i])**2/(2*variance[class_name][i])
            b = 1/(np.sqrt(2*3.14*variance[class_name][i]))
            prob=prob * (b* np.exp(0-a))
       
        if(prob>probability):
            probability = prob
            probability_class = class_name
        prob=1
    return probability_class

In [7]:
#Validation
def validate(sampling_count):
    right = 0
    total = 0
    for class_name in class_names:
        validate_img_paths = glob.glob(validation_folder_path + class_name + '/' +img_type)
        for images in validate_img_paths:
            img = load_img(images)
            training_img_class_name = class_name
            image = feature_extract(img, sampling_count, training_img_class_name)
            predict = naive_bayes(image)
            class_image = class_name
            if(predict == class_name):
                   right+=1
            total +=1
        accuracy = (right/total) * 100
    print("Accuracy: " + str(accuracy) +"%")

In [8]:
#Testing

def testing(cap_probability, tulis_probability):
    right = 0
    total = 0
    for class_name in class_names:
        for image in model_test[class_name]:
            predict = naive_bayes(image, cap_probability, tulis_probability)
            class_image = class_name
            if(predict == class_name):
                   right+=1
            total +=1
    accuracy = (right/total) * 100
    print("Accuracy: " + str(accuracy) +"%")
    
    
def testing_out(sampling_count):
    testing_img_paths = glob.glob(test_folder_path + '/' + img_type)
    for testing_img_path in testing_img_paths:
        imgs = load_img(testing_img_path)
        predict_class = knn(imgs, sampling_count, neighbor)
        print("Categorized as " + predict_class)
        plt.imshow(Image.open(testing_img_path))    

In [9]:
def testings():
    total={}
    total["tulis"] = models.loc[models['class'] == 'tulis']
    total["cap"] = models.loc[models['class'] == 'cap']
    mean["tulis"] = []
    variance["tulis"] = []
    mean["cap"] = []
    variance["cap"] = []
    tulis_probability = (total["tulis"].count()/models.count())[0]
    cap_probability = (total["cap"].count()/models.count())[0]
    print(tulis_probability, cap_probability)
    for a in ["tulis", "cap"]:
        for b in ['contrast', 'homogeneity', 'energy', 'correlation', 'ASM', 'dissimilarity']:
            mean[a].append(total[a][b].mean())
            variance[a].append(total[a][b].var())
    
    testing(cap_probability, tulis_probability)

In [10]:
#Training Testing

learning_rate = 0.8
sampling_count = 128
img_type = '*.jpg'
root_path = './'
training_folder_path = root_path + 'glcm/training/'
validation_folder_path = root_path + 'glcm/validation/'
test_folder_path = root_path + 'glcm/test/'

class_names = get_class_names(training_folder_path)

for i in range(5):
    for class_name in class_names:
        training_img_paths = glob.glob(training_folder_path + class_name + '/' + img_type)
        for training_img_path in training_img_paths:
            training_img = load_img(training_img_path)
            training_img_class_name = class_name
            feature = feature_extract(training_img, sampling_count, training_img_class_name)
            make_a_model(class_name, feature, learning_rate)
        
    models = to_dataframe(model)
    models_test = to_dataframe(model_test)

    testings()
validate(128)



time_modelling_stop = timeit.default_timer()
print('Time elapsed to make this model is ' + str(time_modelling_stop - time_modelling_start) + ' seconds.')

0.4583333333333333 0.5416666666666666
Accuracy: 75.96153846153845%
0.4653014789533561 0.534698521046644
Accuracy: 73.57512953367875%
0.46924829157175396 0.530751708428246
Accuracy: 72.16494845360825%
0.46490218642117376 0.5350978135788262
Accuracy: 73.15270935960592%
0.46813388353966073 0.5318661164603393
Accuracy: 73.74749498997996%
Time elapsed to make this model is 72.6440121 seconds.


In [38]:
x = models.iloc[:, [0, 5]].values  
y = models.iloc[:, 6].values  

In [46]:
x

array([[1.70091043, 0.765625  ],
       [1.4824065 , 0.72637795],
       [1.93307087, 0.87881398],
       ...,
       [1.61226624, 0.72926919],
       [1.64886811, 0.72404035],
       [1.92550443, 0.82105069]])

In [40]:
from sklearn.model_selection import train_test_split  
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 5)

In [41]:
from sklearn.preprocessing import StandardScaler  
sc = StandardScaler()  
x_train = sc.fit_transform(x_train)  
x_test = sc.transform(x_test) 

In [42]:
from sklearn.naive_bayes import GaussianNB  
classifier = GaussianNB()  
classifier.fit(x_train, y_train)  

GaussianNB(priors=None, var_smoothing=1e-09)

In [43]:
y_pred = classifier.predict(x_test)  

In [44]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred) 

0.7014925373134329