In [1]:
import os
import random
import shutil

# nurodom kelia i katalogus
data_dir = 'tomatoes_apples'
apples_dir = os.path.join(data_dir, 'apples')
tomatoes_dir = os.path.join(data_dir, 'tomatoes')

# aprasyti, kokia dalis tenka mokymui, testams ir validacijai
splits = (0.7, 0.15, 0.15)

def split_data(directory:str, splits:tuple) -> None:
    """
    Funkcija yra skirta suskaidyti pateiktam kataloge esancias nuotraukas i tris naujus katalogus, pagal 
    pateiktus isskaidymo dydzius

    Parametrai:
    directory - nuoroda iki failo, kuri norite skaidyti
    splits - tuple, su nurodytais kiekiais mokymui, testavimui ir validacijai
    """
    images = os.listdir(directory) # gauname visas nuotraukas
    random.shuffle(images) # ismaisome nuotraukas, siekiant skirtingu paledimu metu tureti skirtingus duomenis
    # norime suzinoti kiekius, kiek nuotrauku reikes mokymams
    train_size = int(len(images) * splits[0])
    validation_size = int(len(images) * splits[1])
    test_size = int(len(images) * splits[2])
    
    # katalogu sukurimas
    train_dir = os.path.join(directory, 'train')
    validation_dir = os.path.join(directory, 'validation')
    test_dir = os.path.join(directory, 'test')

    # os.removedirs(train_dir)
    # os.removedirs(validation_dir)
    # os.removedirs(test_dir)

    # TODO: sunaikinti katalogus, pries tai sunaikinant turini juose, tai leis isvengti is a directory klaidos

    # katalogu sukurimas, pagal pateiktas nuorodas
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    for i, image in enumerate(images):
        if i < train_size:
            shutil.copy(os.path.join(directory, image), os.path.join(train_dir, image))
        elif i < train_size + validation_size:
            shutil.copy(os.path.join(directory, image), os.path.join(validation_dir, image))
        else: 
            shutil.copy(os.path.join(directory, image), os.path.join(test_dir, image))



split_data(apples_dir, splits)
split_data(tomatoes_dir, splits)

In [2]:
# pasiekti nuotraukas
# patikrinti ar nuotrauka validi
# suvienodinti nuotraukas
import os
from PIL import Image

data_dir = 'tomatoes_apples'
apples_dir = os.path.join(data_dir, 'apples')
tomatoes_dir = os.path.join(data_dir, 'tomatoes')

# skirta patikrinti ar atidarant negausime klaidos, kaip argumenta pateikiame kelia iki nuotraukos
def is_valid_image(file_path):
    try:
        with Image.open(file_path) as img:
            # patikrina ar nuotrauka galima atidaryti
            img.verify()
        return True
    except (IOError, SyntaxError):
        return False

def get_valid_image_files(directory):
    """
    Skirta filtruoti direktorijoje esancius failus, patikrinti ar jie yra validus. Rezultatas failu pavadinimu sarasas, su validziu failu pavadinimasis
    """
    valid_files = []
    # naudojame _, nes neketiname naudoti katalogu(train, test, validation)
    for root, _, files in os.walk(directory):
        if root != directory:
            for file in files:
                file_path = os.path.join(root, file)
                if is_valid_image(file_path):
                    valid_files.append(file_path)
    return valid_files


valid_apples_photos = get_valid_image_files(apples_dir)
valid_tomatoes_photos = get_valid_image_files(tomatoes_dir)

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd 

# naudojame siekiant sumazinti pixeliu vertes is intervalo 0-255 i intervala 0-1
datagen = ImageDataGenerator(rescale=1./255)

# kuriame df, nes tai yra budas perteikti informacija generatoriui
apples_df = pd.DataFrame({'filename': valid_apples_photos})
tomatoes_df = pd.DataFrame({'filename': valid_tomatoes_photos})

apples_generator = datagen.flow_from_dataframe(
    dataframe = apples_df, # nurodom kur yra musu nuotrauku sarasas
    x_col = 'filename', # nurodom kuris stulpelis yra failo kelias musu df
    target_size = (150, 150), # nurodom nuotrauku dydzius
    batch_size = 20, # nurodom kiek nuotrauku idesime kiekvieno iteracijos metu
    class_mode = None) # mes turime tik du galimus outputus, todel class mode nustatome None

tomatoes_generator = datagen.flow_from_dataframe(
    dataframe = tomatoes_df, # nurodom kur yra musu nuotrauku sarasas
    x_col = 'filename', # nurodom kuris stulpelis yra failo kelias musu df
    target_size = (150, 150), # nurodom nuotrauku dydzius
    batch_size = 20, # nurodom kiek nuotrauku idesime kiekvieno iteracijos metu
    class_mode = None) # mes turime tik du galimus outputus, todel class mode nustatome None

Found 218 validated image filenames.
Found 173 validated image filenames.


In [4]:
import numpy as np
from tensorflow.keras.utils import Sequence

# kuriame sia klase tam, kad turetume galimybe training matricas pateikti gabalais (batches)
class CombinedGenerator(Sequence):

    def __init__(self, *generators):
        self.generators = generators
        self._num_batches = sum(len(gen) for gen in generators)
        self.current_generator = 0
    
    def __len__(self):
        return self._num_batches
    
    def __getitem__(self, idx):
        for gen in self.generators:
            if idx < len(gen):
                batch = gen[idx]
                # generuojame labels, atsizvelgdami i tai kiek nariu turime savo batche
                labels = np.array([0] * batch.shape[0]) if gen == apples_generator else np.array([1] * batch.shape[0])
                return batch, labels
            idx -= len(gen)

combined_generator = CombinedGenerator(apples_generator, tomatoes_generator)

In [5]:
from tensorflow.keras import datasets, layers, models

model = models.Sequential([
    layers.Conv2D(32, (3,3), activation = 'relu', input_shape=(150,150,3)), #nurodom filtrus ir ju dydziu dimensija
    layers.MaxPooling2D(2,2), #sumazinam dimensijas, islaikant svarbiausias savybes
    layers.Conv2D(64, (3,3), activation = 'relu'), #antras konvoliucinis sloksnis 
    layers.MaxPooling2D(2,2), #sumazinam dimensijas, islaikant svarbiausias savybes
    layers.Conv2D(128, (3,3), activation = 'relu'),
    layers.MaxPooling2D(2,2),
    layers.Flatten(), #plokstinam duomenis, paversdami is 3D i 1D
    layers.Dense(512, activation= 'relu'),
    layers.Dense(1, activation='sigmoid') #isejimo sluoksnis su vienu neuronu, grazins tikimybe nuo 0 iki 1
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy', #taikome, nes musu klasifikojami duomenys bus 0 arba 1
    metrics=['accuracy']
)

In [7]:
history = model.fit(
    combined_generator, #nurodom generatoriu, is kurio duomenis imsime dalimis
    steps_per_epoch = len(combined_generator), #pasiims batchu kieki
    epochs=10
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 249ms/step - accuracy: 0.4812 - loss: 2.7459
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(value)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 245ms/step - accuracy: 0.7416 - loss: 0.6641
Epoch 4/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 254ms/step - accuracy: 0.3342 - loss: 0.7023
Epoch 6/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 256ms/step - accuracy: 0.5413 - loss: 0.6933
Epoch 8/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 252ms/step - accuracy: 0.5175 - loss: 0.6978
Epoch 10/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00


In [8]:
def preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((150,150))
    img_array = np.array(img)
    img_array = img_array.astype('float32') / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    return img_array

In [12]:
def predict_image(image_path):
    img_array = preprocess_image(image_path)
    prediction = model.predict(img_array)
    return "Obuolys" if prediction[0][0] > 0.5 else 'Pomidoras'

print(predict_image('obuolys.jpg'))
print('--------------')
print(predict_image('pomidoras.jpg'))
print('--------------')
print(predict_image('obuolys22.jpg'))



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Pomidoras
--------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Pomidoras
--------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Pomidoras
