In [1]:
import numpy as np
import tensorflow as tf
import keras
import cv2
import random
from utils.elpv_reader import load_dataset
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.optimizers import SGD,Adam
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Activation,Conv2D, MaxPooling2D,GlobalMaxPooling2D,BatchNormalization
from sklearn.model_selection import KFold
from keras.callbacks import Callback
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score,f1_score
from focal_loss import SparseCategoricalFocalLoss
from keras.regularizers import l2
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

In [2]:
# read dataset
images, proba, types = load_dataset()
# change label, label will be 0 1 2 3
label_mapping = {0.0: 0, 0.3333333333333333: 1, 0.6666666666666666: 2, 1.0: 3}
for x in range(len(proba)):
    if proba[x] in label_mapping:
        proba[x] = label_mapping[proba[x]]
# get the infomations of images
num_images, height, width = images.shape
# Create a new numpy array to store the resized and denoised images
resized_images = np.empty((num_images, 64, 64), dtype=np.uint8)
# use for loop, resize every image and denoise it
for i in range(num_images):
    resized_images[i] = cv2.resize(images[i], (64,64))
    resized_images[i] = cv2.GaussianBlur(resized_images[i], (3, 3),0)
# split the dataset into train set and test set 75% for train , 25% for test
X_train, X_test, y_train, y_test =  train_test_split(resized_images, proba,train_size=0.75,stratify=proba, random_state=30,shuffle=True)

In [3]:
# OverSampling
class_indices1 = np.where(y_train == 1)[0]
samples_to_copy1= np.random.choice(class_indices1, size=100, replace=False)
X_train = np.concatenate([X_train, X_train[samples_to_copy1]])
y_train = np.concatenate([y_train, y_train[samples_to_copy1]])

class_indices2 = np.where(y_train == 2)[0]
samples_to_copy2 = np.random.choice(class_indices2, size=160, replace=True)
X_train = np.concatenate([X_train, X_train[samples_to_copy2]])
y_train = np.concatenate([y_train, y_train[samples_to_copy2]])

In [4]:
# Image Enhancement ------- purpose is make the perfomance better, for example, rotation/scaling can reduce the overfiiting and increase the data diversity
# Contrast Enhancement 
new_X_train = X_train.copy()
new_y_train = y_train.copy()
for x in range(len(new_X_train)):
   new_X_train[x] = cv2.equalizeHist(X_train[x])
new_X_train = np.concatenate([X_train, new_X_train])
new_y_train = np.concatenate([y_train, new_y_train])

# flip(horiton + vertical)
temp_X = X_train.copy()
temp_y = y_train.copy()
for i in range(len(temp_X)):
    temp_X[i] = cv2.flip(X_train[i],1)
new_X_train = np.concatenate([new_X_train, temp_X])
new_y_train = np.concatenate([new_y_train, temp_y])

#Some random rotation (-30 - 30 degrees)
temp_X = X_train.copy()
temp_y = y_train.copy()
for i in range(len(temp_X)):
    rows, cols = X_train[i].shape
    random_angle = random.randint(-30, 30)
    rotation_matrix = cv2.getRotationMatrix2D((cols / 2, rows / 2), random_angle, 1)
    temp_X[i] = cv2.warpAffine(X_train[i], rotation_matrix, (cols, rows))
new_X_train = np.concatenate([new_X_train, temp_X])
new_y_train = np.concatenate([new_y_train, temp_y])
# Fianlly, add some noise on the original image, now we get around 10K train set
temp_X = X_train.copy()
temp_y = y_train.copy()
for i in range(len(temp_X)):
    noise = np.random.normal(loc=0, scale=15, size=X_train[i].shape)
    temp_X[i] = np.clip(X_train[i] + noise, 0, 255).astype(np.float32)
new_X_train = np.concatenate([new_X_train, temp_X])
new_y_train = np.concatenate([new_y_train, temp_y])

In [5]:
# change the data type to float 32
new_X_train = new_X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
# add channel, make sure the image is 4D
new_X_train = new_X_train.reshape(new_X_train.shape[0], new_X_train.shape[1], new_X_train.shape[2],1)
# make sure all pixel is in [0,1]
new_X_train = new_X_train / 255
# find all mono images
all_mono = []
monocrystalline_indices = np.where(types == 'mono')[0]
for i in range(len(X_test)):
    for j in monocrystalline_indices:
        if np.array_equal(X_test[i],resized_images[j]):
            all_mono.append(i)
# find all poly images
all_poly = []
polycrystalline_indices = np.where(types == 'poly')[0]
for i in range(len(X_test)):
    for j in polycrystalline_indices:
        if np.array_equal(X_test[i],resized_images[j]):
            all_poly.append(i)
# make sure all pixel is in [0,1]   
X_test = X_test / 255

In [6]:
#Method 1 , Use CNN model (current parameters is not big_best_model,since we tried a lot different models)
model = Sequential()
model.add(Conv2D(32, (3, 3),padding='same',activation='relu',input_shape=(64, 64, 1)))
model.add(Conv2D(32, (3, 3),padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
model.add(Conv2D(64,(3,3),padding='same',activation='relu'))
model.add(Conv2D(64,(3,3),padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
model.add(Conv2D(128,(3,3),padding='same',activation='relu'))
model.add(Conv2D(128,(3,3),padding='same',activation='relu'))
model.add(Conv2D(128,(3,3),padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
model.add(Conv2D(256,(3,3),padding='same',activation='relu'))
model.add(Conv2D(256,(3,3),padding='same',activation='relu'))
model.add(Conv2D(256,(3,3),padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
# big best model using 512/256, instead of 2048 2048
model.add(Dense(2048,activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(2048,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(4,activation="softmax"))

In [7]:
# After the model built up, we can compile the model, using a low learning rate
model.compile(optimizer=Adam(learning_rate=0.0001), 
             loss=SparseCategoricalFocalLoss(gamma=2),
             metrics=['accuracy'])

In [8]:
# use checkpoint to save the model, we saved 'big_best_model' as our best result. In order to avoid overwriting, we used a new name "new_best_model"
checkpoint = ModelCheckpoint('new_best_model.h5', 
                             monitor='val_accuracy',  
                             save_best_only=True,  
                             mode='max',  #
                             verbose=1)  
# fit the model, use 20% as validation set; store the values in [history], for the graph plot(the loss graph,used to check overfitting)
history = model.fit(new_X_train,new_y_train,batch_size=32, epochs=30,validation_split=0.2,callbacks=[checkpoint])

Epoch 1/30
Epoch 1: val_accuracy improved from -inf to 0.48148, saving model to new_best_model.h5
Epoch 2/30
Epoch 2: val_accuracy did not improve from 0.48148
Epoch 3/30
Epoch 3: val_accuracy improved from 0.48148 to 0.50788, saving model to new_best_model.h5
Epoch 4/30
Epoch 4: val_accuracy did not improve from 0.50788
Epoch 5/30
Epoch 5: val_accuracy improved from 0.50788 to 0.53895, saving model to new_best_model.h5
Epoch 6/30
Epoch 6: val_accuracy improved from 0.53895 to 0.58323, saving model to new_best_model.h5
Epoch 7/30
Epoch 7: val_accuracy improved from 0.58323 to 0.64283, saving model to new_best_model.h5
Epoch 8/30
Epoch 8: val_accuracy did not improve from 0.64283
Epoch 9/30
Epoch 9: val_accuracy improved from 0.64283 to 0.65006, saving model to new_best_model.h5
Epoch 10/30
Epoch 10: val_accuracy did not improve from 0.65006
Epoch 11/30
Epoch 11: val_accuracy did not improve from 0.65006
Epoch 12/30
Epoch 12: val_accuracy improved from 0.65006 to 0.68370, saving model t

Epoch 28: val_accuracy improved from 0.84930 to 0.85568, saving model to new_best_model.h5
Epoch 29/30
Epoch 29: val_accuracy improved from 0.85568 to 0.86547, saving model to new_best_model.h5
Epoch 30/30
Epoch 30: val_accuracy improved from 0.86547 to 0.86633, saving model to new_best_model.h5


In [9]:
# load the best model (currently using the trained model,it can be changed to others)
best_model = load_model('big_best_model.h5')

In [10]:
# predict all images, and change the possibility to exactly class
y_pred = best_model.predict(X_test)
predicted_labels = np.argmax(y_pred, axis=1)
y_test = y_test.astype(int)
# evaluate, f1score / recall /predictions and confusion matrix
f1_score = classification_report(y_test, predicted_labels)
confusion  = confusion_matrix(y_test,predicted_labels)
# print the results
print("Confusion Matrix for All Images:")
print(f1_score)
print(confusion)

Confusion Matrix for All Images:
              precision    recall  f1-score   support

           0       0.73      0.78      0.75       377
           1       0.44      0.47      0.46        74
           2       0.04      0.04      0.04        26
           3       0.68      0.57      0.62       179

    accuracy                           0.66       656
   macro avg       0.47      0.46      0.47       656
weighted avg       0.66      0.66      0.65       656

[[293  29  18  37]
 [ 31  35   3   5]
 [ 15   4   1   6]
 [ 63  11   3 102]]


In [11]:
# Calculate accuracy for each class
accuracy_class_0 = confusion[0, 0] / np.sum(confusion[0, :])
accuracy_class_1 = confusion[1, 1] / np.sum(confusion[1, :])
accuracy_class_2 = confusion[2, 2] / np.sum(confusion[2, :])
accuracy_class_3 = confusion[3, 3] / np.sum(confusion[3, :])
# Print the results with accuracy
print(f"Class 0 (Accuracy): {accuracy_class_0:.2%}")
print(f"Class 1 (Accuracy): {accuracy_class_1:.2%}")
print(f"Class 2 (Accuracy): {accuracy_class_2:.2%}")
print(f"Class 3 (Accuracy): {accuracy_class_3:.2%}")

Class 0 (Accuracy): 77.72%
Class 1 (Accuracy): 47.30%
Class 2 (Accuracy): 3.85%
Class 3 (Accuracy): 56.98%


In [12]:
# Select only the monocrystalline image and calculate the confusion matrix
y_pred_monocrystalline = best_model.predict(X_test[all_mono])
predicted_labels_monocrystalline = np.argmax(y_pred_monocrystalline, axis=1)
confusion_monocrystalline = confusion_matrix(y_test[all_mono], predicted_labels_monocrystalline)
f1_score_monocrystalline = classification_report(y_test[all_mono], predicted_labels_monocrystalline)
print("Confusion Matrix for Monocrystalline Images:")
print(f1_score_monocrystalline)
print(confusion_monocrystalline)

Confusion Matrix for Monocrystalline Images:
              precision    recall  f1-score   support

           0       0.73      0.75      0.74       151
           1       0.54      0.59      0.56        37
           2       0.09      0.08      0.08        13
           3       0.70      0.63      0.67        71

    accuracy                           0.67       272
   macro avg       0.52      0.52      0.51       272
weighted avg       0.67      0.67      0.67       272

[[114  14   8  15]
 [ 12  22   1   2]
 [  7   3   1   2]
 [ 23   2   1  45]]


In [13]:
# Calculate accuracy for monocrystalline class
accuracy_mono_class_0 = confusion_monocrystalline[0, 0] / np.sum(confusion_monocrystalline[0, :])
accuracy_mono_class_1 = confusion_monocrystalline[1, 1] / np.sum(confusion_monocrystalline[1, :])
accuracy_mono_class_2 = confusion_monocrystalline[2, 2] / np.sum(confusion_monocrystalline[2, :])
accuracy_mono_class_3 = confusion_monocrystalline[3, 3] / np.sum(confusion_monocrystalline[3, :])
# Print the results with accuracy for monocrystalline
print(f"Monocrystalline Class 0 (Accuracy): {accuracy_mono_class_0:.2%}")
print(f"Monocrystalline Class 1 (Accuracy): {accuracy_mono_class_1:.2%}")
print(f"Monocrystalline Class 2 (Accuracy): {accuracy_mono_class_2:.2%}")
print(f"Monocrystalline Class 3 (Accuracy): {accuracy_mono_class_3:.2%}")

Monocrystalline Class 0 (Accuracy): 75.50%
Monocrystalline Class 1 (Accuracy): 59.46%
Monocrystalline Class 2 (Accuracy): 7.69%
Monocrystalline Class 3 (Accuracy): 63.38%


In [14]:
# Select only the polycrystalline images and calculate the confusion matrix
y_pred_polycrystalline = best_model.predict(X_test[all_poly])
predicted_labels_polycrystalline = np.argmax(y_pred_polycrystalline, axis=1)
confusion_polycrystalline = confusion_matrix(y_test[all_poly], predicted_labels_polycrystalline)
f1_score_polycrystalline = classification_report(y_test[all_poly], predicted_labels_polycrystalline)
print("Confusion Matrix for Polycrystalline Images:")
print(f1_score_polycrystalline)
print(confusion_polycrystalline)

Confusion Matrix for Polycrystalline Images:
              precision    recall  f1-score   support

           0       0.73      0.79      0.76       226
           1       0.34      0.35      0.35        37
           2       0.00      0.00      0.00        13
           3       0.66      0.53      0.59       108

    accuracy                           0.65       384
   macro avg       0.43      0.42      0.42       384
weighted avg       0.65      0.65      0.65       384

[[179  15  10  22]
 [ 19  13   2   3]
 [  8   1   0   4]
 [ 40   9   2  57]]


In [15]:
# Calculate accuracy for polycrystalline class
accuracy_poly_class_0 = confusion_polycrystalline[0, 0] / np.sum(confusion_polycrystalline[0, :])
accuracy_poly_class_1 = confusion_polycrystalline[1, 1] / np.sum(confusion_polycrystalline[1, :])
accuracy_poly_class_2 = confusion_polycrystalline[2, 2] / np.sum(confusion_polycrystalline[2, :])
accuracy_poly_class_3 = confusion_polycrystalline[3, 3] / np.sum(confusion_polycrystalline[3, :])

# Print the results with accuracy for polycrystalline
print(f"Polycrystalline Class 0 (Accuracy): {accuracy_poly_class_0:.2%}")
print(f"Polycrystalline Class 1 (Accuracy): {accuracy_poly_class_1:.2%}")
print(f"Polycrystalline Class 2 (Accuracy): {accuracy_poly_class_2:.2%}")
print(f"Polycrystalline Class 3 (Accuracy): {accuracy_poly_class_3:.2%}")

Polycrystalline Class 0 (Accuracy): 79.20%
Polycrystalline Class 1 (Accuracy): 35.14%
Polycrystalline Class 2 (Accuracy): 0.00%
Polycrystalline Class 3 (Accuracy): 52.78%
