### Import required libraries

In [1]:
import PIL
import os
import csv

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras import layers
from keras.utils import to_categorical
from pathlib import Path
from PIL import Image

### Define const values

In [2]:
batch_size = 32
img_size = 256
datadir = 'dataset/'

train_set_size = 0.7

categories = [
    "loop_scattering",
    "background_ring",
    "strong_background",
    "diffuse_scattering",
    "artifact",
    "ice_ring",
    "non_uniform_detector"
]

# categories = [
#     "loop_scattering"
# ]

conv_2d_size = 3
conv_2d_activation = 'relu'
pooling_2d_size = 5
loss = 'binary_crossentropy'
batch_size = 300                            # Ilość obrazów wrzucanych jednorarazowo do sieci
epochs = 20
dense_units = 128

### Load images meta

In [3]:
IMAGES_META = {}
TEST_DATA_ORDER = []

def num_to_bool(text):
    if text == "0":
        return 0.
    else:
        return 1.

with open('train.csv', newline='') as csvfile:
    csvreader = csv.DictReader(csvfile)
    
    for row in csvreader:
        image_categories = []
        
        for category in categories:
            image_categories.append(num_to_bool(row[category]))
            
        IMAGES_META[row["image"]] = image_categories

with open('test.csv', newline='') as csvfile:
    csvreader = csv.DictReader(csvfile)
    
    for row in csvreader:
        TEST_DATA_ORDER.append(row["image"])


### Load images

In [4]:
IMAGES = []
LABELS = [[] for x in range(len(categories))]

COMPETITION_IMAGES = []
COMPETITION_IMAGES_NAMES = []

def get_category_cum(name):
    for num, category in enumerate(categories):
        if category == name:
            return num


for img in os.listdir(datadir):
    image = Image.open(datadir + img).convert("L")

    ii = image.resize((img_size,img_size), Image.BICUBIC)
    arr = np.asarray(ii)  / 255.0
    
    anomalies = IMAGES_META.get(img.replace(".png", ""))
    
    if anomalies == None:
        # Zdjęcie nie należy do zbioru testowego
        COMPETITION_IMAGES.append(arr)
        COMPETITION_IMAGES_NAMES.append(img.replace(".png", ""))
        continue

    IMAGES.append(arr)
    
    for x in range(len(categories)):
        LABELS[x].append([anomalies[x]])
    
IMAGES = np.array(IMAGES)
COMPETITION_IMAGES = np.array(COMPETITION_IMAGES)
IMAGES = IMAGES.reshape(len(IMAGES), img_size, img_size, 1)
COMPETITION_IMAGES = COMPETITION_IMAGES.reshape(len(COMPETITION_IMAGES), img_size, img_size, 1)

for x in range(len(categories)):
    LABELS[x] = np.array(LABELS[x])

print("Images shape: ", IMAGES.shape)

Images shape:  (5048, 256, 256, 1)


#### Parametry sieci neuronowej

In [5]:
models = []
for x in range(len(categories)):
    model = Sequential()

    model.add(Conv2D(32, (conv_2d_size, conv_2d_size), input_shape = (img_size, img_size, 1), activation = conv_2d_activation)) # input shape -> bardzo wazne zeby odpowiadalo wymiarowi obrazkow - tutaj np. obrazki 64x64 i 3 kanały
    model.add(MaxPooling2D(pool_size = (pooling_2d_size, pooling_2d_size)))

    model.add(Conv2D(32, (conv_2d_size, conv_2d_size), activation = conv_2d_activation))
    model.add(MaxPooling2D(pool_size = (pooling_2d_size, pooling_2d_size)))

    model.add(Flatten())

    model.add(Dense(units = dense_units, activation = 'sigmoid'))
    model.add(Dense(units = 1, activation = 'sigmoid'))
    
    models.append(model)

In [6]:
for model in models:
    model.compile(
        loss = loss,
        optimizer = 'Adam',
        metrics = ['accuracy']
    )

In [7]:
# Podzielenie na grupy
train_images_count = int(len(IMAGES) * train_set_size)
test_images_count = len(IMAGES) - train_images_count

LABELS_TEST = []
LABELS_TRAIN = []

IMAGES_TRAIN = IMAGES[:train_images_count]
IMAGES_TEST = IMAGES[train_images_count:]

for x in range(len(categories)):
    LABELS_TEST.append(LABELS[x][train_images_count:])
    LABELS_TRAIN.append(LABELS[x][:train_images_count])

In [8]:
hist = []

for category_num, category in enumerate(categories):
    
    hist.append(models[category_num].fit(
        IMAGES_TRAIN,
        LABELS_TRAIN[category_num],
        batch_size = batch_size,
        epochs = epochs,
        validation_split = 0.1
    ))

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12


Epoch 12/12
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [9]:
# Evaluate
if test_images_count > 0:
    
    for x in range(len(categories)):
        models[x].evaluate(IMAGES_TEST, LABELS_TEST[x])



In [11]:
thresholds = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
total_labels = 0

if test_images_count > 0:
    hamming_loss = 0

    def xor_sum(N, L):
        total = 0

        for x in range(len(N)):
            if N[x] != L[x]:
                total += 1

        return total

    total = 0

    categories_results = [0 for x in range(len(categories))]
    categories_false_positive = [0 for x in range(len(categories))]
    categories_true_negetive = [0 for x in range(len(categories))]
    
    predictions = []
    
    for x in range(len(categories)):
        predictions.append(models[x].predict(IMAGES_TEST))

    for num in range(len(IMAGES_TEST)):

        prediction = []
        valid = []
        
        for x in range(len(categories)):
            prediction.append(predictions[x][num])
            
            if (prediction[x] > thresholds[x]):
                prediction[x] = 1
            else:
                prediction[x] = 0
            
            total_labels += 1
            
            valid.append(round(LABELS_TEST[x][num][0]))

        total += xor_sum(valid, prediction)

        for identifier, (valid_response, predicted_response) in enumerate(zip(valid, prediction)):
            
            if predicted_response == 1 and valid_response == 0:
                categories_false_positive[identifier] += 1
            elif predicted_response == 0 and valid_response == 1:
                categories_true_negetive[identifier] += 1
                
            if valid_response == predicted_response:
                categories_results[identifier] += 1

    categories_results = [x / len(IMAGES_TEST) for x in categories_results]

In [12]:
print("- Convolution 2D size: ", conv_2d_size)
print("- Convolution 2D activation: ", conv_2d_activation)
print("- Pooling 2D size: ", pooling_2d_size)
print("- Loss function: ", loss)
print("- Batch size: ", batch_size)
print("- Epochs: ", epochs)
print("- Dense units: ", dense_units)

- Convolution 2D size:  3
- Convolution 2D activation:  relu
- Pooling 2D size:  5
- Loss function:  binary_crossentropy
- Batch size:  300
- Epochs:  12
- Dense units:  128


In [13]:
print("hamming_loss =", total / total_labels)
print("categories_results =", categories_results)
print("categories_false_positives =", categories_false_positive)
print("categories_true_negatives =", categories_true_negetive)
print("categories =", categories)

hamming_loss = 0.14606317774634606
categories_results = [0.7069306930693069, 0.7940594059405941, 0.8858085808580858, 0.9306930693069307, 0.9346534653465347, 0.93003300330033, 0.7953795379537953]
categories_false_positives = [195, 212, 72, 0, 0, 0, 88]
categories_true_negatives = [249, 100, 101, 105, 99, 106, 222]
categories = ['loop_scattering', 'background_ring', 'strong_background', 'diffuse_scattering', 'artifact', 'ice_ring', 'non_uniform_detector']


# Generowanie wyników do csv

In [20]:
row_num = 0
unordered_data = {}

predictions = []
for num in enumerate(categories):
    predictions.append(models[x].predict(COMPETITION_IMAGES))
    
for image_num in range(len(COMPETITION_IMAGES)):
        prediction = []
        
        for num in range(len(categories)):
            prediction.append(predictions[num][image_num])
            
            if (prediction[num] > thresholds[num]):
                prediction[num] = 1
            else:
                prediction[num] = 0
        
        for annomaly_num, annomaly in enumerate(categories):
            if unordered_data.get(COMPETITION_IMAGES_NAMES[image_num]) is None:
                unordered_data[COMPETITION_IMAGES_NAMES[image_num]] = []
            
            unordered_data[COMPETITION_IMAGES_NAMES[image_num]].append(
                {
                    'id': row_num,
                    'image': COMPETITION_IMAGES_NAMES[image_num],
                    'anomaly': annomaly,
                    'predicted': prediction[annomaly_num]
                })
            row_num += 1            

In [15]:
for num, his in enumerate(hist):
    print("loss_" + categories[num], "=", his.history["loss"])
    print("accuracy_" + categories[num], "=", his.history["accuracy"])
    print("val_loss_" + categories[num], "=", his.history["val_loss"])
    print("val_accuracy_" + categories[num], "=", his.history["val_accuracy"])

loss_loop_scattering = [0.6873069405555725, 0.6678471565246582, 0.6408314108848572, 0.6188585758209229, 0.5901282429695129, 0.5747758746147156, 0.5742760896682739, 0.5761433839797974, 0.5734609365463257, 0.560977578163147, 0.5594135522842407, 0.5534055233001709]
accuracy_loop_scattering = [0.5923246145248413, 0.6017615795135498, 0.6454859972000122, 0.6621578931808472, 0.6851211190223694, 0.6999056339263916, 0.7008492946624756, 0.7005347609519958, 0.6932997703552246, 0.7077697515487671, 0.7087134122848511, 0.7124881744384766]
val_loss_loop_scattering = [0.7263337969779968, 0.6994538307189941, 0.6824160814285278, 0.66669762134552, 0.637310266494751, 0.613245964050293, 0.6136537194252014, 0.6157262921333313, 0.6191288232803345, 0.6090585589408875, 0.6086792945861816, 0.6191533207893372]
val_accuracy_loop_scattering = [0.4406779706478119, 0.4661017060279846, 0.5, 0.5112994313240051, 0.6468926668167114, 0.6468926668167114, 0.6497175097465515, 0.6836158037185669, 0.6751412153244019, 0.675141

In [None]:
# row_num = 1
# with open('submission.csv', 'w', newline='') as csvfile:
#     spamwriter = csv.DictWriter(csvfile, fieldnames=["id","image","anomaly","predicted"])
#     spamwriter.writeheader()
    
#     for image_name in TEST_DATA_ORDER:
#         for result_row in unordered_data[image_name]:
#             result_row["id"] = row_num
#             spamwriter.writerow(result_row)
#             row_num += 1