The goal of this notebook is to merge two solutions (UV and non-UV) to get better result.

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import accuracy_score
import numpy as np

import shutil
import os

# Preparing photos 

We start of putting photos to diectories.

In [2]:
def listdir_fullpath(d):
    """Full path of files in directory.
    """    
    return [os.path.join(d, f) for f in os.listdir(d)]


def sort_photos_to_categories_folders(categories,
                                      path_copy_from,
                                      path_copy_to):
    """Copying photos to folder with name of it's category.
    
    Args:
        categories (str): List of categories. Elements of this list will be
                          used for creating folders and searching for
                          entrances in filenames.
        path_copy_from (str): Path that contains photos with names which 
                              contains category that this photo belongs to.
        path_copy_to (str): Path where will be created folders with names given
                            from categories variable.
    """
   
    files_to_copy = listdir_fullpath(path_copy_from)
    print("Found {} files.".format(len(files_to_copy)))

    for category in categories:
        os.makedirs(path_copy_to + category, exist_ok=True)
        for file in files_to_copy:
            if category in file:
                shutil.copy(file, path_copy_to + category)
        print("Finished moving files from category '{}'".format(category))
    return True


def copytree(src, dst, symlinks=False, ignore=None):
    for item in os.listdir(src):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, symlinks, ignore)
        else:
            shutil.copy2(s, d)


def cut_photos(path_cut_from,
               path_to_paste,
               percentage_to_leave):
    """Cuts last 1-percentage_to_leave percent photos and pastes them.
    
    Args:
        path_cut_from (str): Path to directory with categorieses folders.
        path_to_paste (str): Path to directory, where categorieses folders
                             will be pasted.
        percentage_to_leave (float): How many files to leave in each folder.
    """
    
    # Copying all files from train directory.
    copytree(path_cut_from, path_to_paste)
    print("Done copying all files.")
    
    # Removing last 1-percentage_to_leave percent files from every train dir.
    cut_from_categories_dirs = listdir_fullpath(path_cut_from)
    for folder in cut_from_categories_dirs:
        remove_files(folder, percentage_to_leave, False)
    
    # Removing first percentage_to_leave percent files from every val dir.
    to_paste_categories_dirs = listdir_fullpath(path_to_paste)
    for folder in to_paste_categories_dirs:
        remove_files(folder, percentage_to_leave, True)
    
    return True
    
    
def remove_files(path, percentage_to_remove, from_top=True):
    """
    Args:
        path (str): Path to folder with files.
        percentage_to_remove (float): How many files to remove.
        from_top (bool): Remove files from top or from bottom.
    """
    
    files = listdir_fullpath(path)
    files.sort()
    
    number_of_files = len(files)
    delimeter = round(percentage_to_remove*number_of_files)
    
    if from_top:
        for file in files[:delimeter]:
            os.remove(file)
    else:
        for file in files[delimeter:]:
            os.remove(file)
    
    return True

In [3]:
categories = ['алевролит', 'аргиллит', 'переслой', 'песчаник']

path_copy_from_non_UV = "../data/huge data/prepared data/rock/cut photos/ДС/"
path_copy_to_non_UV = "../data/huge data/prepared data/rock/training data/UV+non-UV/non-UV prediction/train/"
path_copy_to_non_UV_val = "../data/huge data/prepared data/rock/training data/UV+non-UV/non-UV prediction/validation/"

path_copy_from_UV = "../data/huge data/prepared data/rock/cut photos/УФ/"
path_copy_to_UV = "../data/huge data/prepared data/rock/training data/UV+non-UV/UV prediction/train/"
path_copy_to_UV_val = "../data/huge data/prepared data/rock/training data/UV+non-UV/UV prediction/validation/"

## Putting photos to folders 

In [4]:
sort_photos_to_categories_folders(categories=categories,
                                  path_copy_from=path_copy_from_non_UV,
                                  path_copy_to=path_copy_to_non_UV)

Found 8005 files.
Finished moving files from category 'алевролит'
Finished moving files from category 'аргиллит'
Finished moving files from category 'переслой'
Finished moving files from category 'песчаник'


True

In [5]:
sort_photos_to_categories_folders(categories=categories,
                                  path_copy_from=path_copy_from_UV,
                                  path_copy_to=path_copy_to_UV)

Found 8005 files.
Finished moving files from category 'алевролит'
Finished moving files from category 'аргиллит'
Finished moving files from category 'переслой'
Finished moving files from category 'песчаник'


True

## Splitting train validation sets 

In [6]:
cut_photos(path_cut_from=path_copy_to_non_UV,
           path_to_paste=path_copy_to_non_UV_val,
           percentage_to_leave=0.8)

Done copying all files.


True

In [7]:
cut_photos(path_cut_from=path_copy_to_UV,
           path_to_paste=path_copy_to_UV_val,
           percentage_to_leave=0.8)

Done copying all files.


True

# Making predictions 

Making predictions with non-UV and UV models.

In [10]:
def prepare_generators(path: str,
                       image_size: (int, int) = (224, 224),
                       batch_size: int = 8):
    """
    Args:
        path: Path to huge data folder.    
    """

    test_generator = ImageDataGenerator(rescale=1 / 255)

    test_generator = test_generator.flow_from_directory(
        path,
        target_size=image_size,
        shuffle=False
    )

    return test_generator

In [11]:
def make_predictions(model_path,
                     photos_path):
    """Returns predictions for photos.
    
    Args:
       model_path (str): Path to model.
       photos_path (str): Path, that contains folder which names
                          are categories.
    
    """
    
    model = keras.models.load_model(model_path)
    
    test_generator = prepare_generators(path=photos_path,
                                        image_size=(224, 224),
                                        batch_size=8)
    
    y_test_labels = test_generator.classes
    y_score = model.predict_generator(test_generator)
    
    print("Classes: {}".format(test_generator.class_indices))
    print("Classifier accuracy: {:.2f}".format(accuracy_score(y_test_labels, [np.argmax(i) for i in y_score])))
    
    return y_score

In [18]:
non_UV_model_path = "../models/rock/non_UV_best.h5"
UV_model_path = "../models/rock/UV best.h5"

non_UV_photos_path_train = "../data/huge data/prepared data/rock/training data/UV+non-UV/non-UV prediction/train/"
non_UV_photos_path_val = "../data/huge data/prepared data/rock/training data/UV+non-UV/non-UV prediction/validation/"

UV_photos_path = "../data/huge data/prepared data/rock/training data/UV+non-UV/UV prediction/train/"
UV_photos_path_val = "../data/huge data/prepared data/rock/training data/UV+non-UV/UV prediction/validation/"

In [7]:
non_UV_predictions = make_predictions(model_path=non_UV_model_path,
                                      photos_path=non_UV_photos_path) 

Found 7360 images belonging to 4 classes.
Classes: {'алевролит': 0, 'аргиллит': 1, 'переслой': 2, 'песчаник': 3}
Classifier accuracy: 0.77


In [9]:
UV_predictions = make_predictions(model_path=UV_model_path,
                                  photos_path=UV_photos_path) 

Found 7360 images belonging to 4 classes.
Classes: {'алевролит': 0, 'аргиллит': 1, 'переслой': 2, 'песчаник': 3}
Classifier accuracy: 0.77


# Merging outputs 

In [13]:
[categories[np.argmax(i)] for i in non_UV_predictions][7000:]

['песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'аргиллит',
 'песчаник',
 'алевролит',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник'

In [12]:
[categories[np.argmax(i)] for i in UV_predictions][7000:]

['песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'переслой',
 'переслой',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'алевролит',
 'алевролит',
 'алевролит',
 'алевролит',
 'алевролит',
 'песчаник',
 'песчаник',
 'песчаник',
 'алевролит',
 'песчаник',
 'аргиллит',
 'песчаник',
 'аргиллит',
 'аргиллит',
 'аргиллит',
 'аргиллит',
 'аргиллит',
 'аргиллит',
 'аргиллит',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песчаник',
 'песч