In [1]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import PCA 
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import MinMaxScaler
from skimage.feature import greycomatrix, greycoprops
from sklearn.cluster import KMeans
from sklearn.externals import joblib
import matplotlib.pyplot as plt
import numpy as np
import mahotas
import cv2
import glob
import csv



In [2]:
def create_csv_file(y_pred, imgs_to_predict):
    csv_filename = '2C_prediction.csv'
    with open(csv_filename, 'w', newline='') as csvfile:
        fieldnames = ['image_id', 'classe']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for i in range(len(y_pred)):
            filename = imgs_to_predict[i]
            classname = y_pred[i]
            writer.writerow({'image_id' : filename, 'classe' : classname})

In [3]:
# descripteur 0: Color Moments
def color_moments(image):
    R = image[:,:,0]
    G = image[:,:,1]
    B = image[:,:,2]
    colorFeature=[
            np.mean(R), np.std(R),
            np.mean(G), np.std(G),
            np.mean(B), np.std(B)
    ]
    colorFeature = colorFeature/np.mean(colorFeature)
    return colorFeature.tolist()

In [4]:
# descripteur 1: Forme
def forme(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # extraction de la forme
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature.tolist()

In [5]:
# descripteur 2: Texture
def texture(image):
    # convertir l'image en niveaux de gris
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # extraction de la texture 
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    return haralick.tolist()

In [6]:
# descripteur 3: Histogramme
def histogramme(image, mask=None):
    # convert l'image en espace de couleur HSV 
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # extraction d'histogramme
    hist  = cv2.calcHist([image], [0, 1, 2], mask, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    # normalisation d'histogramme
    cv2.normalize(hist, hist)
    return hist.flatten().tolist()

In [7]:
# tailles fixes pour l'image
fixed_size = tuple((120, 80))

# chemin pour les données d'entraînement
train_path = "2Classes"

# listes vides pour les vecteurs et les étiquettes
global_features = []

# boucle sur les images dans chaque sous-dossier
path = glob.glob("2Classes"+"/*.jpg")
image_names = []
for file in path:
    image = cv2.imread(file)
    #image = cv2.resize(image, fixed_size,interpolation = cv2.INTER_AREA)
    
    # extraction des Features
    image_names.append(file.split('/')[-1])
    forme_ = forme(image)
    texture_   = texture(image)
    colormoments_ = color_moments(image)
    histogramme_  = histogramme(image)
        
    # Concatener les features
    global_feature=histogramme_
    global_feature.extend(texture_)
    global_feature.extend(colormoments_)
    global_feature.extend(forme_)
    
    global_features.append(global_feature)
    
print("End of features extractions...")

End of features extractions...


In [8]:
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(global_features)
sklearn_pca = PCA(n_components =13)
Y_sklearn = sklearn_pca.fit_transform(np.array(X).astype('float'))

kmeans = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='ward')

result=kmeans.fit_predict(Y_sklearn)
result=result+1

In [9]:
#sauvegarder le resultat
create_csv_file(result,image_names)

In [10]:
# sauvegarder le model
filename = 'model2Classes.sav'
joblib.dump(kmeans, filename)
     
# some time later...
# load the model from disk
Y_test=[]
for i in range(0,200):
    if i<100 : Y_test.append(0)
    else : Y_test.append(1)

loaded_model = joblib.load('model2Classes.sav')