## Importing useful packages

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageStat,ImageOps
from skimage.io import imread,imshow,imsave
from scipy.stats import entropy
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from scipy.stats import kurtosis,skew

## Loading training data

In [2]:
train_dir = 'C:/Users/Lenovo/Documents/Study/Master/Seventh Semester/ISM/Programming/Project/ISM/ISM/train'
filepath = 'C:/Users/Lenovo/Documents/Study/Master/Seventh Semester/ISM/Programming/Project/ISM/ISM/train.txt'
dir_list=os.listdir(train_dir)
#dir_list=dir_list[0:1]
image_pd=[]
rms=[]
stddev=[]
features=[]
kurtVal=[]
skewness=[]


In [3]:
def findImage(image):
    x=[]
    image=image.replace('"','')
    with open(filepath, 'r') as f:
        for line in f:
            y = line.split()
            x.append(y[0].replace("'",""))
            x.append(y[1].replace("'",""))
        value = x.index(image)
        newvalue=x[value+1]
    return newvalue

In [4]:
#Function that allows to improve the contrast of an image
def contrastImage(img):
    img = imread(img)
    img_to_yuv = cv2.cvtColor(img,cv2.COLOR_BGR2YUV)
    img_to_yuv[:,:,0] = cv2.equalizeHist(img_to_yuv[:,:,0])
    hist_equalization_result = cv2.cvtColor(img_to_yuv, cv2.COLOR_YUV2BGR)
    return hist_equalization_result


In [None]:
count=0
for i in dir_list:
    count=count+1;
    location = train_dir + "/" + i
    features.append(findImage(i))
    im = Image.open(location) #Using PIL
    im2 = ImageOps.grayscale(im) #Change to Grayscale
    im_gray = ImageOps.grayscale(im)
    #im_gray = contrastImage(im_gray)
    image_pd.append(i)
    stat = ImageStat.Stat(im2)
     # KURTOSIS and SKEWNESS
    print("Kurtosis: ", kurtosis(im_gray, axis=None) )
    kurtVal.append(kurtosis(im_gray, axis=None))
    
    print("Skew: ", skew(im_gray, axis=None))
    skewness.append(skew(im_gray, axis=None))
    
    #Root Mean Square and Standard Deviation
    rms.append(stat.rms[0])
    stddev.append(stat.stddev[0])
    print(i," ",count)
df = pd.DataFrame({'image': image_pd, 'rms': rms,'stddev':stddev,'features':features, 'Kurtosis' : kurtVal, 'Skewness': skewness})

In [None]:
pd.set_option('display.max_columns', 7)
df

## Computing the entropy

In [None]:
# Defining a pattern directory to access all images
im_locations = []
for im in image_pd:
    im = train_dir +'/'+ im
    im_locations.append(im)

In [None]:
# Computing entropy for all images in the train dataset
probs = []
counts = []
entropy_cal = []
for i in range (len(im_locations)):
    counts.append(np.unique(imread(im_locations[i],as_gray=True), return_counts= True))
    sum_counts = sum(counts[i][1])
    for number in counts[i][1]:
        probs.append(number/sum_counts)
    entropy_cal.append(entropy(np.array(probs)))
    probs = []


In [None]:
# Adding the entropy to the dataframe
df = df.assign(entropy=entropy_cal)
df

## Creating models

### Test 1 : Using only the root mean square as a feature

In [None]:
# X is the root mean squares of all training images and Y is the labels associated to those images
X = df.rms
Y = df.features
X, Y

In [None]:
# Splitting X and Y into training and validation dataset. Applying a shuffle to the data and stratify helps to keep the same proportions of each labe
X_train, X_val, Y_train, Y_val = train_test_split(X,Y, shuffle = True, stratify= Y)

In [None]:
# Reshaping X to use it as input to the model
X_train = X_train.values.reshape(-1,1)
X_val = X_val.values.reshape(-1,1)

In [None]:
# Creating an SVM named model1 and fitting it with the training set
model1 = svm.SVC()
model1.fit(X_train, Y_train)

In [None]:
#Predicting the labels of the validation set
y_pred = model1.predict(X_val)
y_pred
np.unique(y_pred) #Checking which cases does the prediction include

In [None]:
#Computing the accuracy score using the predicted labels and comparing them to the real ones
accuracy_score(Y_val, y_pred)

In [None]:
#Creating another model using KNN with k = 3 . K has been randomly chosen and can be changed of course
from sklearn.neighbors import KNeighborsClassifier
k = 3
clf = KNeighborsClassifier(n_neighbors = k)
clf.fit(X_train, Y_train)
Ypred_v = clf.predict(X_val)
accuracy_score(Y_val, Ypred_v)

### Test 2 : Using Kurtosis and Skewness

In [None]:
# X contains the kurtosis and skewness of all images
X2 = df[['Kurtosis','Skewness']]

In [None]:
X2_train, X2_val, Y2_train, Y2_val = train_test_split(X2,Y, shuffle =True, stratify = Y)

In [None]:
# Normalizing the data. StandardScaler uses the mean and std of the data which are computed automatically in the fit part.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler(with_mean=True, with_std=True)
sc = sc.fit(X2_train)
# Transforming training and validation set into normalized values
X2train_n = sc.transform(X2_train)
X2val_n = sc.transform(X2_val)

### Test 3 : Using rms, Stddev , Kurtosis, Skewness and entropy 

In [None]:
X3 = df[['rms','stddev','Kurtosis','Skewness','entropy']]


In [None]:
X3_train, X3_val, Y3_train, Y3_val = train_test_split(X3,Y, shuffle =True, stratify = Y)
X3_train

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler(with_mean=True, with_std=True)
sc = sc.fit(X3_train)
X3train_n = sc.transform(X3_train)
X3val_n = sc.transform(X3_val)

In [None]:
model7 = svm.SVC()
model7.fit(X3train_n, Y3_train)
y3_pred = model7.predict(X3val_n)
accuracy_score(y3_pred, Y3_val)


In [None]:
np.unique(y3_pred)

In [None]:
# Computing the confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y3_pred, Y3_val)

In [None]:
# Creating a decision tree model
from sklearn import tree
model_tree = tree.DecisionTreeClassifier()
model_tree.fit(X3train_n, Y3_train)
y3_tree = model_tree.predict(X3val_n)
accuracy_score(y3_tree, Y3_val)

In [None]:
# Testing another kernel for SVM
model8 = svm.SVC(kernel = 'poly')
model8.fit(X3train_n, Y3_train)
y4_pred = model8.predict(X3val_n)
accuracy_score(y4_pred, Y3_val)

In [None]:
# Testing a KNN model
k = 75
clf = KNeighborsClassifier(n_neighbors = k)
clf.fit(X3train_n, Y3_train)
Ypred_v = clf.predict(X3val_n)
accuracy_score(Y3_val, Ypred_v)

## Testing the contrastImage function

In [None]:
location
imshow(location)

In [None]:
im_read = contrastImage(location)


In [None]:
imshow(im_read)

## Creating text file for predictions

In [None]:
def textfile_predictions(image_set,prediction_set,textfile_name):
    f= open(textfile_name,"w+")
    for i in range (len(image_set)):
        f.write(image_set[i]+" ")
        f.write(prediction_set[i]+"\n")
    f.close()
        

## Computing the hu moments as well as other features 

In [None]:
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

def fd_haralick(image):    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return haralick
 
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    hist.flatten()

In [None]:
# Computing the hu moments for all images
hu_moments = []
for im in im_locations :
    hu_moments.append(fd_hu_moments(imread(im)))

In [None]:
# Adding hu moments to the dataframe
df['hu moments'] = hu_moments
df

### test 4 : Using hu moments (in progress...)

In [None]:
X4 = df['hu moments']
for i in range(len(X4)):
    X4[i] = X4[i][0:3]


In [None]:
X4.values

In [None]:
X4_train, X4_val, Y4_train, Y4_val = train_test_split(X4,Y, shuffle =True, stratify = Y)

In [None]:
model8 = svm.SVC()
model8.fit(X4_train.values, Y4_train)

### Working on test data : Prediction number 1

In [None]:
dir_testlist = []
test_dir = 'data/test'
test_noise_dir = 'data/test_noise'
dir_testlist=os.listdir(test_dir)
len(dir_testlist)

In [None]:
imagetest_pd = []
rms_test=[]
stddev_test=[]
kurtVal_test=[]
skewness_test=[]


In [None]:
count=0
for i in dir_testlist:
    count=count+1;
    location = test_dir + "/" + i
    im = Image.open(location) #Using PIL
    im2 = ImageOps.grayscale(im) #Change to Grayscale
    im_gray = ImageOps.grayscale(im)
    #im_gray = contrastImage(im_gray)
    imagetest_pd.append(i)
    stat = ImageStat.Stat(im2)
     # KURTOSIS and SKEWNESS
    print("Kurtosis: ", kurtosis(im_gray, axis=None) )
    kurtVal_test.append(kurtosis(im_gray, axis=None))
    
    print("Skew: ", skew(im_gray, axis=None))
    skewness_test.append(skew(im_gray, axis=None))
    
    #Root Mean Square and Standard Deviation
    rms_test.append(stat.rms[0])
    stddev_test.append(stat.stddev[0])
    print(i," ",count)
df_test = pd.DataFrame({'image': imagetest_pd, 'rms': rms_test,'stddev':stddev_test, 'Kurtosis' : kurtVal_test, 'Skewness': skewness_test})

In [None]:
df_test

In [None]:
im_testlocations = []
for im in imagetest_pd:
    im = test_dir +'/'+ im
    im_testlocations.append(im)

In [None]:
# Computing entropy for all images in the test dataset
probs = []
counts = []
entropy_cal = []
for i in range (len(im_testlocations)):
    counts.append(np.unique(imread(im_testlocations[i],as_gray=True), return_counts= True))
    sum_counts = sum(counts[i][1])
    for number in counts[i][1]:
        probs.append(number/sum_counts)
    entropy_cal.append(entropy(np.array(probs)))
    probs = []


In [None]:
df_test = df_test.assign(entropy=entropy_cal)
df_test

In [None]:
# Extracting all the features for the test dataset
X3_test = df_test[['rms','stddev','Kurtosis','Skewness','entropy']]
X3_test


In [None]:
# Predictions on test dataset using the best model yet (64% accuracy)
X3test_n = sc.transform(X3_test)
ytest_pred = model7.predict(X3test_n)

In [None]:
# Creating a textfile containing the images and the associated classes
textfile_predictions(imagetest_pd,ytest_pred,'test_predictions_version1.txt')

### Working on noise test data : Prediction number 1

In [None]:
dir_noisetestlist = []
dir_noisetestlist=os.listdir(test_noise_dir)
len(dir_noisetestlist)

In [None]:
imagenoisetest_pd = []
rms_noisetest=[]
stddev_noisetest=[]
kurtVal_noisetest=[]
skewness_noisetest=[]

In [None]:
count=0
for i in dir_noisetestlist:
    count=count+1;
    location = test_noise_dir + "/" + i
    im = Image.open(location) #Using PIL
    im2 = ImageOps.grayscale(im) #Change to Grayscale
    im_gray = ImageOps.grayscale(im)
    #im_gray = contrastImage(im_gray)
    imagenoisetest_pd.append(i)
    stat = ImageStat.Stat(im2)
     # KURTOSIS and SKEWNESS
    print("Kurtosis: ", kurtosis(im_gray, axis=None) )
    kurtVal_noisetest.append(kurtosis(im_gray, axis=None))
    
    print("Skew: ", skew(im_gray, axis=None))
    skewness_noisetest.append(skew(im_gray, axis=None))
    
    #Root Mean Square and Standard Deviation
    rms_noisetest.append(stat.rms[0])
    stddev_noisetest.append(stat.stddev[0])
    print(i," ",count)
df_noisetest = pd.DataFrame({'image': imagenoisetest_pd, 'rms': rms_noisetest,'stddev':stddev_noisetest, 'Kurtosis' : kurtVal_noisetest, 'Skewness': skewness_noisetest})

In [None]:
df_noisetest

In [None]:
im_noisetestlocations = []
for im in imagenoisetest_pd:
    im = test_noise_dir +'/'+ im
    im_noisetestlocations.append(im)

In [None]:
probs = []
counts = []
entropy_cal = []
for i in range (len(im_noisetestlocations)):
    counts.append(np.unique(imread(im_noisetestlocations[i],as_gray=True), return_counts= True))
    sum_counts = sum(counts[i][1])
    for number in counts[i][1]:
        probs.append(number/sum_counts)
    entropy_cal.append(entropy(np.array(probs)))
    probs = []


In [None]:
df_noisetest = df_noisetest.assign(entropy=entropy_cal)
df_noisetest

In [None]:
X3_noisetest = df_noisetest[['rms','stddev','Kurtosis','Skewness','entropy']]
X3_noisetest

In [None]:
X3noisetest_n = sc.transform(X3_noisetest)
ynoisetest_pred = model7.predict(X3noisetest_n)


In [None]:
textfile_predictions(imagenoisetest_pd,ynoisetest_pred,'noisetest_predictions_version1.txt')

### More features : LocalBinaryPatterns

In [None]:
from skimage import feature

In [None]:
class LocalBinaryPatterns:
	def __init__(self, numPoints, radius):
		# store the number of points and radius
		self.numPoints = numPoints
		self.radius = radius
	def describe(self, image, eps=1e-7):
		# compute the Local Binary Pattern representation
		# of the image, and then use the LBP representation
		# to build the histogram of patterns
		lbp = feature.local_binary_pattern(image, self.numPoints,
			self.radius, method="uniform")
		(hist, _) = np.histogram(lbp.ravel(),
			bins=np.arange(0, self.numPoints + 3),
			range=(0, self.numPoints + 2))
		# normalize the histogram
		hist = hist.astype("float")
		hist /= (hist.sum() + eps)
		# return the histogram of Local Binary Patterns
		return hist

In [None]:
desc = LocalBinaryPatterns(24, 8)
desc2 = LocalBinaryPatterns(16, 4)

In [None]:
data =[]
# loop over the training images
for imagePath in im_locations:
	# load the image, convert it to grayscale, and describe it
	image = cv2.imread(imagePath)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	hist = desc.describe(gray)
	# extract the label from the image path, then update the
	# label and data lists
	data.append(hist)


In [None]:
data2 =[]
# loop over the training images
for imagePath in im_locations:
	# load the image, convert it to grayscale, and describe it
	image = cv2.imread(imagePath)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	hist = desc2.describe(gray)
	# extract the label from the image path, then update the
	# label and data lists
	data2.append(hist)


In [None]:
data2

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(data,Y, shuffle = True, stratify= Y)

In [None]:
X_train

In [None]:
model = svm.SVC()
model.fit(X_train, Y_train)
y3_pred = model.predict(X_val)
accuracy_score(y3_pred, Y_val)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler(with_mean=True, with_std=True)
sc = sc.fit(X_train)
# Transforming training and validation set into normalized values
Xtrain_n = sc.transform(X_train)
Xval_n = sc.transform(X_val)

In [None]:
model2 = svm.SVC()
model2.fit(Xtrain_n, Y_train)
y3_pred = model2.predict(Xval_n)
accuracy_score(y3_pred, Y_val)

In [None]:
data_copy = data.copy()
data_copy

In [None]:
from sklearn.neighbors import KNeighborsClassifier
k = 55
clf = KNeighborsClassifier(n_neighbors = k)
clf.fit(Xtrain_n, Y_train)
Ypred_v = clf.predict(Xval_n)
accuracy_score(Y_val, Ypred_v)

In [None]:
data_copy

In [None]:
data_copy = np.hstack((data_copy,data2))

In [None]:
data_copy[0]

In [None]:
X2_train, X2_val, Y2_train, Y2_val = train_test_split(data_copy,Y, shuffle = True, stratify= Y)

In [None]:
model3 = svm.SVC()
model3.fit(X2_train, Y2_train)
y3_pred = model3.predict(X2_val)
accuracy_score(y3_pred, Y2_val)

In [None]:
sc = StandardScaler(with_mean=True, with_std=True)
sc = sc.fit(X2_train)
# Transforming training and validation set into normalized values
X2train_n = sc.transform(X2_train)
X2val_n = sc.transform(X2_val)

In [None]:
model4 = svm.SVC()
model4.fit(X2train_n, Y2_train)
y3_pred = model4.predict(X2val_n)
accuracy_score(y3_pred, Y2_val)

In [None]:
y3_pred

In [None]:
len(y3_pred)

In [None]:
np.unique(y3_pred)

In [None]:
len(X2_val)

In [None]:
# Computing the confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y3_pred, Y2_val)

In [None]:
from sklearn.metrics import f1_score
f1_score(Y2_val, y3_pred,average='weighted')

In [None]:
from sklearn import tree
model_tree = tree.DecisionTreeClassifier()
model_tree.fit(X2train_n, Y2_train)
y2_tree = model_tree.predict(X2val_n)
accuracy_score(y2_tree, Y2_val)

In [None]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
model_forest = RandomForestClassifier()
model_forest.fit(X2train_n, Y2_train)
y2_forest = model_forest.predict(X2val_n)
accuracy_score(y2_forest, Y2_val)

In [None]:
model_trees = ExtraTreesClassifier()
model_trees.fit(X2train_n, Y2_train)
y2_trees = model_trees.predict(X2val_n)
accuracy_score(y2_trees, Y2_val)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
k = 24
clf = KNeighborsClassifier(n_neighbors = k)
clf.fit(X2train_n, Y2_train)
Ypred_v = clf.predict(X2val_n)
accuracy_score(Y2_val, Ypred_v)

### Calculating LocalBinaryPatterns on test data and test data with noise

In [None]:
datatest =[]
datatest2 = []
# loop over the training images
for imagePath in im_testlocations:
	# load the image, convert it to grayscale, and describe it
	image = cv2.imread(imagePath)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	hist = desc.describe(gray)
	hist2 = desc2.describe(gray)
	# extract the label from the image path, then update the
	# label and data lists
	datatest.append(hist)
	datatest2.append(hist2)

In [None]:
datanoisetest =[]
datanoisetest2 = []
# loop over the training images
for imagePath in im_noisetestlocations:
	# load the image, convert it to grayscale, and describe it
	image = cv2.imread(imagePath)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	hist = desc.describe(gray)
	hist2 = desc2.describe(gray)
	# extract the label from the image path, then update the
	# label and data lists
	datanoisetest.append(hist)
	datanoisetest2.append(hist2)

In [None]:
datatest3 = np.hstack((datatest,datatest2))

In [None]:
datatest3_n = sc.transform(datatest3)

In [None]:
y3test_svm = model4.predict(datatest3_n)
np.unique(y3test_svm)

In [None]:
textfile_predictions(imagetest_pd,y3test_svm,'test_predictions_version2.txt')

In [None]:
datanoisetest3 = np.hstack((datanoisetest,datanoisetest2))

In [None]:
datanoisetest3_n = sc.transform(datanoisetest3)

In [None]:
y3noisetest_svm = model4.predict(datanoisetest3_n)
np.unique(y3noisetest_svm)

In [None]:
textfile_predictions(imagenoisetest_pd,y3noisetest_svm,'noisetest_predictions_version2.txt')

In [None]:
y2test_forest = model_forest.predict(datatest3_n)
np.unique(y2test_forest)

In [None]:
textfile_predictions(imagetest_pd,y2test_forest,'test_predictions_version3.txt')

In [None]:
y2noisetest_forest = model_forest.predict(datanoisetest3_n)
np.unique(y2noisetest_forest)

In [None]:
textfile_predictions(imagenoisetest_pd,y2noisetest_forest,'noisetest_predictions_version3.txt')

In [None]:
y2test_trees = model_trees.predict(datatest3_n)
np.unique(y2test_trees)

In [None]:
textfile_predictions(imagetest_pd,y2test_trees,'test_predictions_version4.txt')

In [None]:
y2noisetest_trees = model_trees.predict(datanoisetest3_n)
np.unique(y2noisetest_trees)

In [None]:
textfile_predictions(imagenoisetest_pd,y2noisetest_trees,'noisetest_predictions_version4.txt')

In [None]:
Ytestpred_v = clf.predict(datatest3_n)
np.unique(Ytestpred_v)

In [None]:
textfile_predictions(imagetest_pd,Ytestpred_v,'test_predictions_version5.txt')

In [None]:
Ynoisetestpred_v = clf.predict(datanoisetest3_n)
np.unique(Ynoisetestpred_v)

In [None]:
textfile_predictions(imagenoisetest_pd,Ynoisetestpred_v,'noisetest_predictions_version5.txt')