In [None]:
from PIL import Image
import io
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from numpy import expand_dims
import os
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report
import random
import requests
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.utils import load_img
from tensorflow.keras.utils import img_to_array 
import zipfile


In deze tweede 'model generator' genaamd 'modelGenerator2' wil ik de data op dezelfde manier inladen als wij geleerd hebben in de Notebook Numpy2. De code die gebruikt wordt voor het inladen in 'modelGenerator' komt uit de de tensorflow tutorial:  https://www.tensorflow.org/tutorials/load_data/images

# Inladen van de data

De 'augumented' data is onderscheiden door de soort bewerking voor de bestandsnaam toe te voegen. Deze zijn te herkennnen aan de namen 'flipped', 'black' en 'duotone'.

In [None]:
imgHeight = 100
imgWidth = 100

In [None]:
txtFiles = list() # deze lijst wordt aangemaakt om later hier onder te kunnen gebruiken bij het aanmaken van de lijsten met bestandsnamen uit de verschillende klasses.

In [None]:
categories = ['Blotch', 'Normal', 'Rot', 'Scab']

trainDir = "data/Train_cleaned_and_augumented"

for category in categories:
  imgDir = f"{trainDir}/{category}_Apple/"
  for filename in os.listdir(imgDir): 
    txtFile = os.path.join(imgDir, filename)
    txtFiles.append(txtFile) 
    # print(txtFile)


# Data inlezen

In [None]:
txtFiles = [x for x in txtFiles if ".jpeg" in x] # Ook al weet ik dat er enkel jpg afbeeldingen in mijn dataset staan filter ik de data op 'jpg'. Dit doe ik om de 'hidden files' die in MacOs gebakken zitten (.DS_Store') te filteren.
print(len(txtFiles))

In [None]:
len(txtFiles)
# print(txtFiles)

In [None]:
imageObjects= list()

for txtFile in txtFiles:
    imageObjects.append (np.asarray(Image.open(txtFile)).astype('uint8')/255) # Afbeelding wordt genormaliseerd.

imageObjects = np.array(imageObjects) # List wordt omgezet in array.


In [None]:
# print(imageObjects)

**Gebruik de bestandsnamen voor het verkrijgen van de labels**

In [None]:
# imageLabels = np.empty(len(txtFiles), dtype = 'S20')

i = 0

for label in txtFiles:
    txtFiles[i] = label.split('/')[2]
    i += 1
    
labelNames, labelNumbers = np.unique(txtFiles, return_inverse=True)

labelDict = dict(zip(np.unique(labelNumbers), labelNames))

# np.array(np.unique(labelNumbers, return_counts=True)).T # demonstratie van methode unique. 

In [None]:
print(labelNames)
print(labelNumbers)
print(labelDict)

In [None]:
txtFilesSel = []
imageObjectSel = []
indexRanges = [(0,888),(888,1360),(1360,2272),(2272,2949)] # kijk of je dit nog kunt automatiseren door te koppelen aan de lengte van...



for indexR in indexRanges:
    for number in range (470): # Nadeel -> wordt elke keer opnieuw bepaald
        randomIndex = random.choice(range(indexR[0],indexR[1]))
        txtFilesSel.append(txtFiles[randomIndex])
        imageObjectSel.append(imageObjects[randomIndex])

imageObject = np.array(imageObjectSel) # omzettend


In [None]:
type(imageObjectSel)

In [None]:
# print(txtFilesSel[1])
# print(imageObjectSel)
# type(imageObjects)
# print(txtFiles[888])

<h3>Data splitten</h3>

In [None]:
trainSet, testSet, trainLabels, testLabels = train_test_split(imageObjects, labelNumbers, stratify = labelNumbers, train_size = 0.75, random_state=42)
print(trainSet.shape)
nrOfImages = len(trainSet)

In [None]:
print(nrOfImages)

<h3>Data inspecteren</h3>

In [None]:
def inspectData():
    
    plt.figure(figsize=(10,10))
    
    for i in range(16):
        plt.subplot(4, 4, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        number = i
        plt.imshow(trainSet[number])
        plt.xlabel(labelNames[trainLabels[number]]) 
    
    plt.show()

In [None]:
inspectData()

# Bouwen van het model

In [None]:
# #model 2 geinspireerd van nick nochnack

model = Sequential()
num_classes = len(categories)

model.add(layers.Rescaling(1./255, input_shape=(imgHeight, imgWidth, 3)))
model.add(Conv2D(32, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(32, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(32, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(layers.Dense(num_classes))

model.summary()

<h3>Definieer de trainparameters</h3>

In [None]:
lossFunction = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
gdAlgorithm = keras.optimizers.Adam(learning_rate=0.001)
nrOfEpochs = 5

<h3>Train het model<h3>

In [None]:
model.compile(optimizer=gdAlgorithm, loss=lossFunction, metrics="accuracy")


In [None]:
history = model.fit(trainSet, trainLabels, epochs=nrOfEpochs, batch_size=32, verbose=2)

In [None]:
# model.save(os.path.join(f'models/[new]px.h5'))

<h3>Evalueer het model<h3>

In [None]:
test_data_dir = '/Users/boyfrankclaesen/workspace/makeAIWork2/projects/apple_disease_classification/classifier/data/Test'

test_ds = tf.keras.utils.image_dataset_from_directory(
  test_data_dir,
  seed=123,
  image_size=(imgHeight, imgWidth),
  batch_size=batch_size)

In [None]:
# scores = model.evaluate(test_ds)