#### Import the packages and functions

In [2]:
## --- Import the packages and functions
from tools.functions import *
import pickle

#### Definitions, parameters and directory paths

In [3]:
# --- Definitions, parameters and directory paths
# Define the base path for your images
base_path       = '../img'

# Define the path to your dataset within the base path and using gray images
dataset_path    = f'{base_path}/dataset/gray/'

# List of ratios to be used for splitting the dataset into training and testing sets
train_ratio     = [0.75, 0.8, 0.85, 0.9]

# Threshold for normalizing the image data
thresh_normalization = 0.500

# List of thresholds to be considered 'good' in evaluation
thresh_good     = [0.00, 0.10, 0.15]

# Range of iterations to be performed during model training
iterations      = range(1, 6)

# Define different classification types for the model to learn. Each type is associated with a list of categories.
classification_types = {'n_grains': ['50', '60', '70', '80', '90', '100'], 
                        'defect_stratified': ['0%', '10%', '15%', '20%', '25%', '30%'], 
                        'defect_thresholded': ['With defects', 'Healthy']}

# Define labels for the x and y axes in plotting
x_label = 'Predicted'
y_label = 'True'

## ---

#### Reading the dataframes and classification results

In [7]:
with open("../resources/df_train.pkl", "rb") as f:
    df_train = pickle.load(f)

with open("../resources/df_test.pkl", "rb") as f:
    df_test = pickle.load(f)

with open("../resources/classification_metrics_results.pickle", "rb") as f:
    classification_metrics_results = pickle.load(f)

with open("../resources/confusion_matrices.pickle", "rb") as f:
    confusion_matrices = pickle.load(f)

#### Exports a certain confusion matrix in pdf format

In [11]:
train_ratio_item = train_ratio[0]
thresh_good_item = thresh_good[1]
classification_type = 'n_grains'
image_name = 'cm_075_ngrains'

cm = confusion_matrices[train_ratio_item][thresh_good_item][classification_type]['mean']
export_confusion_matrix_as_image(cm, classification_types[classification_type], f"..\{image_name}.pdf", [x_label, y_label])

#### Shows the classification results for a certain specification

In [15]:
train_ratio_item = train_ratio[0]
thresh_good_item = thresh_good[1]
classification_type = 'defect_thresholded'

display(pd.DataFrame(classification_metrics_results[train_ratio_item][thresh_good_item][classification_type]['mean']).transpose())

Unnamed: 0,precision,recall,f1-score,support
With defects,0.828711,0.872731,0.849411,80.2
Healthy,0.908334,0.873349,0.890036,116.2
accuracy,0.873171,0.873171,0.873171,0.873171
macro avg,0.868522,0.87304,0.869724,196.4
weighted avg,0.87609,0.873171,0.873622,196.4


In [17]:
df = df_train.loc[(df_train['defect_percentage'] == 0.00) & (df_train['grain_quantity'] == 100), ['npixels_1to255_per_grain', 'ratio_80to255_by_1to80', 'normalized_ratio_80to255_by_1to80']].copy()
display(len(df))

KeyError: "['ratio_80to255_by_1to80', 'normalized_ratio_80to255_by_1to80'] not in index"

In [None]:
# Assuming df is the DataFrame you created earlier
x = np.arange(len(df))
y1 = df['ratio_80a255_por_1a80']
y2 = df['ratio_80a255_por_1a80_normalizado']

# Set the style and context for a more elegant plot
sns.set(style='whitegrid', context='notebook')

plt.figure(figsize=(10, 6))

plt.plot(x, y1, label='Relação entre pixels claros/escuros', marker='o', linestyle='-', linewidth=2, markersize=4)
plt.plot(x, y2, label='Relação entre pixels claros/escuros normalizada', marker='o', linestyle='-', linewidth=2, markersize=4)

plt.xlabel('Ocorrência', fontsize=18)
plt.ylabel('Relação', fontsize=18)
# plt.title('Comparison of ratio_80a255_por_1a80 and ratio_80a255_por_1a80_normalizado', fontsize=16)
plt.legend(fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
# Set the plot borders to black
for spine in plt.gca().spines.values():
    spine.set_edgecolor('black')
    spine.set_linewidth(1)

# Save and display the plot
plt.savefig('sem_normalizacao.pdf', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
df1 = df_train.copy()
df1, df_test = normalize_dataset(df1, df_test, 'ratio_80a255_por_1a80', 1.000)
df1 = df1.loc[(df1['percentual_defeitos'] == 0.00) & (df1['qtde_graos'] == 100), ['npixels_1a255_por_grao', 'ratio_80a255_por_1a80', 'ratio_80a255_por_1a80_normalizado']].copy()
display(len(df1))


# Assuming df is the DataFrame you created earlier
x = np.arange(len(df1))
y3 = df1['ratio_80a255_por_1a80']
y4 = df1['ratio_80a255_por_1a80_normalizado']

# Set the style and context for a more elegant plot
sns.set(style='whitegrid', context='notebook')

plt.figure(figsize=(10, 6))

plt.plot(x, y3, label='Relação entre pixels claros/escuros', marker='o', linestyle='-', linewidth=2, markersize=4)
plt.plot(x, y4, label='Relação entre pixels claros/escuros normalizada', marker='o', linestyle='-', linewidth=2, markersize=4)

plt.xlabel('Ocorrência', fontsize=18)
plt.ylabel('Relação', fontsize=18)
# plt.title('Comparison of ratio_80a255_por_1a80 and ratio_80a255_por_1a80_normalizado', fontsize=16)
plt.legend(fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

# Set the plot borders to black
for spine in plt.gca().spines.values():
    spine.set_edgecolor('black')
    spine.set_linewidth(1)

# Save and display the plot
plt.savefig('normalizacao_1.pdf', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
df2 = df_train.copy()
df2, df_test = normalize_dataset(df2, df_test, 'ratio_80a255_por_1a80', 0.750)
df2 = df2.loc[(df2['percentual_defeitos'] == 0.00) & (df2['qtde_graos'] == 100), ['npixels_1a255_por_grao', 'ratio_80a255_por_1a80', 'ratio_80a255_por_1a80_normalizado']].copy()
display(len(df2))

# Assuming df is the DataFrame you created earlier
x = np.arange(len(df2))
y5 = df2['ratio_80a255_por_1a80']
y6 = df2['ratio_80a255_por_1a80_normalizado']

# Set the style and context for a more elegant plot
sns.set(style='whitegrid', context='notebook')

plt.figure(figsize=(10, 6))

plt.plot(x, y5, label='Relação entre pixels claros/escuros', marker='o', linestyle='-', linewidth=2, markersize=4)
plt.plot(x, y6, label='Relação entre pixels claros/escuros normalizada', marker='o', linestyle='-', linewidth=2, markersize=4)

plt.xlabel('Ocorrência', fontsize=18)
plt.ylabel('Relação', fontsize=18)
# plt.title('Comparison of ratio_80a255_por_1a80 and ratio_80a255_por_1a80_normalizado', fontsize=16)
plt.legend(fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

# Set the plot borders to black
for spine in plt.gca().spines.values():
    spine.set_edgecolor('black')
    spine.set_linewidth(1)

# Save and display the plot
plt.savefig('normalizacao_075.pdf', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
df3 = df_train.copy()
df3, df_test = normalize_dataset(df3, df_test, 'ratio_80a255_por_1a80', 0.500)
df3 = df3.loc[(df3['percentual_defeitos'] == 0.00) & (df3['qtde_graos'] == 100), ['npixels_1a255_por_grao', 'ratio_80a255_por_1a80', 'ratio_80a255_por_1a80_normalizado']].copy()
# display(len(df2))


# Assuming df is the DataFrame you created earlier
x = np.arange(len(df3))
y7 = df3['ratio_80a255_por_1a80']
y8 = df3['ratio_80a255_por_1a80_normalizado']

# Set the style and context for a more elegant plot
sns.set(style='whitegrid', context='notebook')

plt.figure(figsize=(10, 6))

plt.plot(x, y7, label='Relação entre pixels claros/escuros', marker='o', linestyle='-', linewidth=2, markersize=4)
plt.plot(x, y8, label='Relação entre pixels claros/escuros normalizada', marker='o', linestyle='-', linewidth=2, markersize=4)

plt.xlabel('Ocorrência', fontsize=18)
plt.ylabel('Relação', fontsize=18)
# plt.title('Comparison of ratio_80a255_por_1a80 and ratio_80a255_por_1a80_normalizado', fontsize=16)
plt.legend(fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

# Set the plot borders to black
for spine in plt.gca().spines.values():
    spine.set_edgecolor('black')
    spine.set_linewidth(1)

# Save and display the plot
plt.savefig('normalizacao_050.pdf', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Assuming df is the DataFrame you created earlier
x = np.arange(len(df))
y1 = df['ratio_80a255_por_1a80']
y2 = df['ratio_80a255_por_1a80_normalizado']

# Set the style and context for a more elegant plot
sns.set(style='whitegrid', context='notebook')

plt.figure(figsize=(10, 6))

plt.plot(x, y1, label='Relação entre pixels claros/escuros', marker='o', linestyle='-', linewidth=2, markersize=4)
plt.plot(x, y2, label='Relação entre pixels claros/escuros normalizada', marker='o', linestyle='-', linewidth=2, markersize=4)

plt.xlabel('Ocorrência', fontsize=14)
plt.ylabel('Relação', fontsize=14)
# plt.title('Comparison of ratio_80a255_por_1a80 and ratio_80a255_por_1a80_normalizado', fontsize=16)
plt.legend(fontsize=12)

# Set the plot borders to black
for spine in plt.gca().spines.values():
    spine.set_edgecolor('black')
    spine.set_linewidth(1)

# Save and display the plot
plt.savefig('sem_normalizacao.pdf', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# import pickle

# with open("results/classification_metrics_results.pickle", "rb") as f:
#     classification_metrics_results = pickle.load(f)

# with open("results/confusion_matrices.pickle", "rb") as f:
#     confusion_matrices = pickle.load(f)

# # display(confusion_matrices)


In [None]:
# classification_metrics_results[0.75][thresh_good[1]]['n_grains'][1]['accuracy']

In [None]:
# train_ratio     = [0.75, 0.8, 0.85, 0.9]

# x = []
# accuracy = []

# for ratio in train_ratio:
#     for i in iterations:
#         accuracy.append(classification_metrics_results[ratio][thresh_good[1]]['n_grains'][1]['accuracy'])
#         x.append(int(ratio*100.0))

# print(np.mean(accuracy))
# print(x)
# print(accuracy)



In [None]:
# train_ratio = [0.75, 0.8, 0.85, 0.9]

# accuracy = []
# x = []

# for ratio in train_ratio:
#     for i in iterations:
#         accuracy.append(classification_metrics_results[ratio][thresh_good[1]]['n_grains'][i]['accuracy'])
#         x.append(int(ratio*100))

# plt.boxplot(accuracy)
# plt.xlabel('Train Ratio')
# plt.ylabel('Accuracy')
# plt.title('Accuracy Boxplot')
# plt.show()

In [None]:
# train_ratio = [0.75, 0.8, 0.85, 0.9]

# for ratio in train_ratio:
#     accuracy = []
#     for i in iterations:
#         accuracy.append(classification_metrics_results[ratio][thresh_good[1]]['n_grains'][i]['accuracy'])
    
#     plt.boxplot(accuracy)
#     plt.xlabel('Train Ratio')
#     plt.ylabel('Accuracy')
#     plt.title(f'Accuracy Boxplot for Ratio {int(ratio*100)}')
#     plt.show()


In [None]:


# train_ratio = [0.75, 0.8, 0.85, 0.9]
# iterations      = range(1, 6)

# accuracy_data = []
# x_labels = []

# for ratio in train_ratio:
#     accuracy = []
#     for i in iterations:
#         accuracy.append(classification_metrics_results[ratio][thresh_good[2]]['n_grains'][i]['accuracy'])
    
#     accuracy_data.append(accuracy)
#     # x_labels.append(int(ratio*100))
#     x_labels.append(ratio)

# plt.boxplot(accuracy_data, labels=x_labels)
# plt.xlabel('Percentual de treino')
# plt.ylabel('Acurácia')
# # plt.title('Accuracy Boxplot')
# plt.savefig('accuracy_boxplot_ngrains.pdf', dpi=300, bbox_inches='tight')
# plt.show()




In [None]:
import matplotlib.pyplot as plt

train_ratio = [0.75, 0.8, 0.85, 0.9]
iterations = range(1, 6)

accuracy_data = []
x_labels = []

for ratio in train_ratio:
    accuracy = []
    for i in iterations:
        accuracy.append(classification_metrics_results[ratio][thresh_good[1]]['defect_stratified'][i]['accuracy'])

    accuracy_data.append(accuracy)
    x_labels.append(ratio)

# Set up the plot with the desired style
plt.figure(figsize=(10, 6))
plt.style.use('default')

# Create the boxplot
plt.boxplot(accuracy_data, labels=x_labels)

# Set the x and y axis labels
plt.xlabel('Percentual de treino', fontsize=16)
plt.ylabel('Acurácia', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
# Set the plot borders to black
for spine in plt.gca().spines.values():
    spine.set_edgecolor('black')
    spine.set_linewidth(1)

# Remove gridlines
plt.grid(False)

# Save and display the plot
# plt.savefig('accuracy_boxplot_defect_stratified.pdf', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# train_ratio = [0.75, 0.8, 0.85, 0.9]

# accuracy_data = []
# x_labels = []

# for ratio in train_ratio:
#     accuracy = []
#     for i in iterations:
#         accuracy.append(classification_metrics_results[ratio][thresh_good[1]]['n_grains'][i]['accuracy'])
    
#     accuracy_data.append(accuracy)
#     x_labels.append(int(ratio*100))

# plt.boxplot(accuracy_data, labels=x_labels)
# plt.xlabel('Train Ratio')
# plt.ylabel('Accuracy')
# plt.title('Accuracy Boxplot')

# # Set the y-axis range to show more values
# plt.ylim(0.7, 0.9)

# plt.show()


In [None]:
# import pickle
# import pandas as pd

# with open("results/df_train.pkl", "rb") as f:
#     df_train = pickle.load(f)

# with open("results/df_test.pkl", "rb") as f:
#     df_test = pickle.load(f)



# df_train.to_csv("results/df_train.csv", index=False)
# df_test.to_csv("results/df_test.csv", index=False)

In [None]:
cm = confusion_matrices[train_ratio[0]][thresh_good[1]]['n_grains']['mean']
display(cm)
export_confusion_matrix_as_image(cm, classification_types['n_grains'], 'cm_075_ngrains.png', [x_label, y_label])

cm = confusion_matrices[train_ratio[1]][thresh_good[1]]['n_grains']['mean']
# display(cm)
export_confusion_matrix_as_image(cm, classification_types['n_grains'], 'cm_080_ngrains.png', [x_label, y_label])

cm = confusion_matrices[train_ratio[2]][thresh_good[1]]['n_grains']['mean']
# display(cm)
export_confusion_matrix_as_image(cm, classification_types['n_grains'], 'cm_085_ngrains.png', [x_label, y_label])

cm = confusion_matrices[train_ratio[3]][thresh_good[1]]['n_grains']['mean']
# display(cm)
export_confusion_matrix_as_image(cm, classification_types['n_grains'], 'cm_090_ngrains.png', [x_label, y_label])
