In [None]:
class WindturbineDetector():
    
    def __init__(self, selection_windturbine_paths=[""], selection_no_windturbine_paths=[""], 
                 categories_windturbine_crops=[3], categories_no_windturbine_crops=[2], 
                 pixel="40p", image_bands=["B02", "B03", "B04", "B08"], rescale_factor=2**14, 
                 rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1,
                 horizontal_flip=False, vertical_flip=False, fill_mode="constant", cval=0.0,
                 num_cnn_layers=2, filters=16, kernel_sizes=[5, 5], layer_activations=["relu", "relu"],
                 input_shape=[30, 30, 4], pool_size=2, strides=2, full_connection_units=128, 
                 full_connection_activation="relu", output_units=1, output_activation="sigmoid",
                 optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"], epochs=10):
        
            """
            initialize all parameters for the data preparation
            
            Parameters for data import
            ----------
            categories_windturbine_crops: list, [1,2,3]
                Set one or more categories of selection for windturbine selection. 
                Default is [3]
            categories_no_windturbine_crops: list, [1,2]
                Set one or more categories of selection for random crop selection. 
                Default is [2]
            pixel: str, ("10p", "20p", "30p", "40p" or "50p")
                Set one pixel value for the image.
                Default is "30p"
            selection_windturbines_path: pathlib.Path, pathlib.Path("")
                Set a list of paths to selection_windturbine folders in pathlib.Path format following the folder convention.
                Default is ""
            selection_no_windturbines_paths: pathlib.Path, pathlib.Path("")
                Set a list of paths to selection__no_windturbine folders in pathlib.Path format following the folder convention.
                Default is ""
            image_bands: list, ["B02", "B03", "B04", "B08"]
                Set the preferred image bands for the image.
                Default is ["B02", "B03", "B04", "B08"]
            
            Parameters for data preprocessing
            ----------
            rescale_factor: int, >0
                Set a rescale factor for the image preprocessing in order to get values between 0 and 1.
                Default is 2**14
            rotation_range: int, 0 to 180
                Set a value to randomly rotate images in the range (degrees, 0 to 180)
                Default is 10
            zoom_range: float or [lower, upper] 
                Set a range for random zoom. If a float, [lower, upper] = [1-zoom_range, 1+zoom_range].
                Default is 0.1
            width_shift_range: float, 1-D array-like or int 
                Set a value in order to shift the image width wise.
                Default is 0.1
            height_shift_range: float, 1-D array-like or int 
                Set a value in order to shift the image height wise.
                Default is 0.1   
            horizontal_flip: Boolean, True or False
                Set a value to randomly flip inputs horizontally.
                Default is False
            vertical_flip: Boolean, True or False
                Set a value to randomly flip inputs vertically.
                Default is False
            fill_mode: str, ("constant", "nearest", "reflect" or "wrap"). 
                Set a mode for the fillmode. Points outside the boundaries of the input are filled according 
                to the given mode: 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k)
                                   'nearest': aaaaaaaa|abcd|dddddddd
                                   'reflect': abcddcba|abcd|dcbaabcd
                                   'wrap': abcdabcd|abcd|abcdabcd
                Default is "constant"
            cval: Float or Int 
                Value used for points outside the boundaries when fill_mode = "constant".
                Default is 0.0
            
            Parameters for building the convolutional neural network CNN
            ----------
            num_cnn_layers: int, >0
                Set the number of layers for the CNN.
                If changed number of kernel_sizes list has to changed accordingly.
                Default is 2
            filters: int, (16, 32, 64 ...)
                Set the number of filters used for the first layer (this mount is doubled each layer). 
                Default is 16
            kernel_sizes: list of int, [kernel size layer 1, kernel size layer 2, ...]
                Set the kernel size for each layer, first element corresponds to first layer etc..
                The amount of kernel sizes has to be equal to the number of cnn layers.
                Default is [5, 5]
            layer_activations: str, ("relu", "tanh", "sigmoid"... see more on keras.activations)
                Set the activaiton functions for each layer, first element corresponds to first layer etc..
                The amount of activation functions has to be equal to the number of cnn layers.
                Default is ["relu", "relu"]
            input_shape: list, [rows, cols, channels]
                Set the shape of the input image.
                Default is [30, 30, 4]
            pool_size: int, >0
                Set the pool size of max pooling.
                Default is 2
            strides: int, >0
                Set the strides of max pooling.
                Default is 2
            full_connection_units: int, >0
                Set the dimensionality of the full connection outer space.
                Default is 128
            full_connection_activation: str, ("relu", "tanh", "sigmoid"... see more on keras.activations)
                Set the activation function of the full connection (ANN) layer.
                Default is "relu"
            output_units: int, >0
                Set the dimensionality of the output outer space.
                Default is 1
            output_activation: str, ("relu", "tanh", "sigmoid"... see more on keras.activations)
                Set the activation function of the output layer.
                Default is "sigmoid"
                
            Parameters for training the convolutional neural network CNN
            ----------
            optimizer: str, ("adam"... see more on keras.optimizers)
                Set the optimizer function of the cnn compiling stage.
                Default is "adam"
            loss: str, ("binary_crossentropy"... see more on keras.losses)
                Set the loss function of the cnn compiling stage.
                Default is "binary_crossentropy"
            metrics: list of str, (["accuracy"] ... see more on keras.metrics)
                Set the metrics of the cnn compiling stage.
                Default is ["accuracy"]
            epochs: int, >0
                Set the number of epochs to train the model.
                Default is 10
            """

            self.categories_windturbine_crops = categories_windturbine_crops
            self.categories_no_windturbine_crops = categories_no_windturbine_crops
            self.pixel = pixel
            self.selection_windturbine_paths = selection_windturbine_paths
            self.selection_no_windturbine_paths = selection_no_windturbine_paths
            self.image_bands = image_bands
            self.rescale_factor = rescale_factor
            self.rotation_range = rotation_range
            self.zoom_range = zoom_range
            self.width_shift_range = width_shift_range
            self.height_shift_range = height_shift_range
            self.horizontal_flip = horizontal_flip
            self.vertical_flip = vertical_flip
            self.fill_mode = fill_mode
            self.cval = cval
            self.num_cnn_layers = num_cnn_layers
            self.filters = filters
            self.kernel_sizes = kernel_sizes
            self.layer_activations = layer_activations
            self.input_shape = input_shape
            self.pool_size = pool_size
            self.strides = strides
            self.full_connection_units = full_connection_units
            self.full_connection_activation = full_connection_activation
            self.output_units = output_units
            self.output_activation = output_activation
            self.optimizer = optimizer
            self.loss = loss
            self.metrics = metrics
            self.epochs = epochs
            
            self.indices = []
            self.indices_train = []
            self.indices_test = []
            

    def get_images_from_path(self, windturbines, categories, path=""):
        """Expects a pathlib path and windturbine paramter (0 = no windturbine, 1 = windturbine)
        Returns the independent variable four dimensional numpy array with every image bands, categories 
        and pixel shape selected by the user of every crop inside the folders. Also this function returns
        the dependent variable vector (windturbine: Yes/No) corresponding to the independent variable array
        (images).
        
        Parameters
        ----------
        windtubines: int, (1 or 0)
            Set the parameter to either 1 for data with windturbines and 0 without windturbines.
            There is no default!
        categories: list, [1,2,3] or [1,2]
            Set one or more categories of the selection. 
            There is no default!
        path: pathlib.Path, pathlib.Path("")
            Set a list of paths to sentinal image folders in pathlib.Path format following the folder convention.
            Default is ""
        
        Returns
        ----------
        X_images: list, 4D array
            Returns a 4D list with images of every folder inside the given path
        y_images: list, 1D array
            Returns a 1D list with 1s (windturbines) or 0s (no windturbines) corresponding to given input
        """
        
        import rasterio
        import numpy as np
        
        X_images = []
        y_images = []

        # loop through every category inside the selected windturbine crop folder
        for category in path.glob("*"):
            # only select categories and pixel shape selected by the user
            if category.name.count("_") == 3:
                if int(category.name.split("_")[1]) in categories and category.name.split("_")[3] == self.pixel:
                    for crop in category.glob("*"):
                        if crop.is_dir() and crop.name != "0_combined-preview":

                            image_path = crop / "sensordata" / "R10m"
                            image_list = np.array([])

                            # append every user selected image band to a list
                            for element in image_path.glob("*_*_B*_10m.jp2"):
                                if element.name.split("_")[2] in self.image_bands:
                                    with rasterio.open(str(element)) as f:
                                        if image_list.size == 0:
                                            image_list = f.read(indexes=1)
                                        else:
                                            image_list = np.dstack((image_list, f.read(indexes=1)))

                            X_images.append(image_list)
                            y_images.append(windturbines)
                            self.indices.append(crop.name.split("_")[0])
        
        return X_images, y_images 
    
    
    def create_wt_identification_data(self):
        """Takes in path lists for windturbine and no windturbine image crops, appends every image to an array
        and simultaniously adds a factorial variable to another list which indicates if the image contains a windturbine

        Parameters
        ----------
        selection_windturbines_path: pathlib.Path, pathlib.Path("")
            Set a list of paths to selection_windturbine folders in pathlib.Path format following the folder convention.
            Default is ""
        selection_no_windturbines_paths: pathlib.Path, pathlib.Path("")
            Set a list of paths to selection__no_windturbine folders in pathlib.Path format following the folder convention.
            Default is ""

        Returns
        ----------
        X: list, 4D array
            Returns a 4D list with images of every folder inside the given paths
        y: list, 1D array
            Returns a 1D list with 1s (windturbines) and 0s (no windturbines)
        """
        
        import numpy as np
        
        # initialize the independent and dependent variable
        X = []
        y = []
        
        for path in self.selection_windturbine_paths:
            X_images, y_images = self.get_images_from_path(windturbines=1, categories=self.categories_windturbine_crops,
                                                           path=path)
            X.extend(X_images)
            y.extend(y_images)

        for path in self.selection_no_windturbine_paths:
            X_images, y_images = self.get_images_from_path(windturbines=0, categories=self.categories_no_windturbine_crops,
                                                           path=path)
            X.extend(X_images)
            y.extend(y_images)
        
        X = np.array(X)
    
        return X, y
        
        
    def preprocess_data(self, X, y):
        
        from tensorflow.keras.preprocessing.image import ImageDataGenerator
        
        train_datagen = ImageDataGenerator(rescale = 1./self.rescale_factor,
                                           rotation_range=self.rotation_range,  # randomly rotate images in the range (degrees, 0 to 180)
                                           zoom_range=self.zoom_range, # Randomly zoom image 
                                           width_shift_range=self.width_shift_range,  # randomly shift images horizontally (fraction of total width)
                                           height_shift_range=self.height_shift_range,  # randomly shift images vertically (fraction of total height)
                                           horizontal_flip=self.horizontal_flip,  # randomly flip images
                                           vertical_flip=self.vertical_flip,  # randomly flip images
                                           fill_mode=self.fill_mode, cval=self.cval)
        dataset = train_datagen.flow(x=X, y=y)
        
        return dataset
    
    
    def build_CNN(self):
        
        import tensorflow as tf
        
        # Initialize CNN
        cnn = tf.keras.models.Sequential()
        
        # Step 1: Convolution and pooling of layers
        for i in range(1, self.num_cnn_layers+1):
            if i == 1:
                cnn.add(tf.keras.layers.Conv2D(filters=self.filters*i, kernel_size=self.kernel_sizes[0], activation=self.layer_activations[0], input_shape=self.input_shape))
                cnn.add(tf.keras.layers.MaxPool2D(pool_size=self.pool_size, strides=self.strides))
            else:
                cnn.add(tf.keras.layers.Conv2D(filters=self.filters*i, kernel_size=self.kernel_sizes[i-1], activation=self.layer_activations[i-1]))
                cnn.add(tf.keras.layers.MaxPool2D(pool_size=self.pool_size, strides=self.strides))
        
        # Step 2: Flattening
        cnn.add(tf.keras.layers.Flatten())
        
        # Step 3: Full connection
        cnn.add(tf.keras.layers.Dense(units=self.full_connection_units, activation=self.full_connection_activation))
        
        # Step 4: Output Layer
        cnn.add(tf.keras.layers.Dense(units=self.output_units, activation=self.output_activation))
        
        return cnn
    
    
    def train_CNN(self, cnn, training_set, test_set):
        
        import tensorflow as tf
        
        # Step 1: compiling the CNN
        cnn.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics)
        
        # Step 2: Training the CNN on the Training set and evaluating it on the Test set
        cnn.fit(x=training_set, validation_data=test_set, epochs=self.epochs)
    
    
    def create_confusion_matrix(self):
        pass
    
    
    def predict_single_observation(self):
        pass
    
    
    def detect_windturbines_with_CNN(self):

        from sklearn.model_selection import train_test_split

        # 1. Import the data:
        X, y = self.create_wt_identification_data()

        # 2. Split data into training and test data:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

        # 2.1. Randomize and split the indices with the same random_state in order to keep the indices of the crops
        X_train, X_test, indices_train, indices_test = train_test_split(X, dataset.indices, test_size = 0.2, random_state = 0)
        self.indices_train = indices_train
        self.indices_test = indices_test

        # 3. Preprocess the data:
        training_set = self.preprocess_data(X_train, y_train)
        test_set = self.preprocess_data(X_test, y_test)
        
        # 4. Build the CNN:
        cnn = self.build_CNN()
        
        # 5. Compile, train and evaluate the CNN:
        