In [None]:
# pip install imbalanced-learn

from tensorflow.keras.preprocessing.image import ImageDataGenerator

2024-07-05 09:40:46.214428: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-05 09:40:50.773320: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
import os
import numpy as np
import rasterio
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def load_dataset(data_dir, img_size=(256, 256)):
    """
    Load dataset from specified directory.

    Parameters:
    - data_dir (str): Directory containing train, val, and test subdirectories.
    - img_size (tuple): Desired size of the image (height, width).

    Returns:
    - train_data (tuple): Tuple containing (X_train, y_train).
    - val_data (tuple): Tuple containing (X_val, y_val).
    - test_data (tuple): Tuple containing (X_test, y_test).
    """
    train_dir = os.path.join(data_dir, 'train')
    val_dir = os.path.join(data_dir, 'val')
    test_dir = os.path.join(data_dir, 'test')

    # Load training data
    X_train, y_train = load_data_from_dir(os.path.join(train_dir, 'input'), os.path.join(train_dir, 'output'), img_size)
    # Load validation data
    X_val, y_val = load_data_from_dir(os.path.join(val_dir, 'input'), os.path.join(val_dir, 'output'), img_size)
    # Load test data
    X_test, y_test = load_data_from_dir(os.path.join(test_dir, 'input'), os.path.join(test_dir, 'output'), img_size)

    # Preprocess to handle NaN values
    X_train = preprocess_data(X_train)
    X_val = preprocess_data(X_val)
    X_test = preprocess_data(X_test)

    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

def preprocess_data(images):
    """
    Preprocesses input images to handle NaN values.

    Parameters:
    - images (numpy.ndarray): Array of input images.

    Returns:
    - images (numpy.ndarray): Processed array of input images.
    """
    # Replace NaN values with 0
    images[np.isnan(images)] = 0

    return images

def load_data_from_dir(input_dir, output_dir, img_size):
    """
    Load data (images and labels) from input and output directories.

    Parameters:
    - input_dir (str): Directory containing input images.
    - output_dir (str): Directory containing output images.
    - img_size (tuple): Desired size of the image (height, width).

    Returns:
    - images (numpy.ndarray): Array of loaded input images.
    - labels (numpy.ndarray): Array of corresponding output images.
    """
    images = []
    labels = []

    for filename in os.listdir(input_dir):
        if filename.endswith('.tif'):
            # Load input image (X)
            input_path = os.path.join(input_dir, filename)
            img = load_tiff_image(input_path, img_size)
            images.append(img)

            # Load corresponding output image (y)
            output_filename = filename.replace('.tif', '_cl.tif')
            output_path = os.path.join(output_dir, output_filename)
            label = load_tiff_image(output_path, img_size, is_label=True)
            label[np.isnan(label)] = 0
            labels.append(label)

    if images and labels:
        images = np.array(images)
        labels = np.array(labels)

    return images, labels

def load_tiff_image(path, img_size, is_label=False):
    """
    Load a TIFF image from specified path.

    Parameters:
    - path (str): Path to the TIFF image.
    - img_size (tuple): Desired size of the image (height, width).
    - is_label (bool): Whether the image is a label image.

    Returns:
    - img (numpy.ndarray): Loaded image as a numpy array.
    """
    with rasterio.open(path) as src:
        img = src.read()

    # Reshape and resize if necessary
    img = img.transpose(1, 2, 0)  # Change from bands x height x width to height x width x bands
    img = img[:img_size[0], :img_size[1], :]  # Resize to desired size

    if is_label:
        # Convert label image to binary classification (1 for Marine Debris, 0 for others)
        img = (img == 1).astype(np.uint8)  # Assuming Marine Debris class is encoded as 1

    return img

# Example usage:
data_dir = 'MARIDA'
(X_train, y_train), (X_val, y_val), (X_test, y_test) = load_dataset(data_dir)

print(f"Training data: X_train shape = {X_train.shape}, y_train shape = {y_train.shape}")
print(f"Validation data: X_val shape = {X_val.shape}, y_val shape = {y_val.shape}")
print(f"Testing data: X_test shape = {X_test.shape}, y_test shape = {y_test.shape}")


Training data: X_train shape = (694, 256, 256, 11), y_train shape = (694, 256, 256, 1)
Validation data: X_val shape = (328, 256, 256, 11), y_val shape = (328, 256, 256, 1)
Testing data: X_test shape = (359, 256, 256, 11), y_test shape = (359, 256, 256, 1)


In [None]:
print(y_train.shape)

(694, 256, 256, 1)


In [None]:
print(X_train.shape)

(694, 256, 256, 11)


In [None]:
from imblearn.over_sampling import SMOTE
X = np.reshape(X_train, (256*256*694, 11))
Y = np.reshape(y_train, (256*256*694, 1))
print(Y.shape)

(45481984, 1)


In [None]:
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X, Y)
print(X_res.shape)
print(y_res.shape)

(90960082, 11)
(90960082,)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Initialize Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf_classifier.fit(X_res, y_res)

# Predict on the test set
y_pred = rf_classifier.predict(X_res)

# Evaluate the model
report = classification_report(y_res, y_pred)

# Print the classification report
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00  45480041
           1       1.00      1.00      1.00  45480041

    accuracy                           1.00  90960082
   macro avg       1.00      1.00      1.00  90960082
weighted avg       1.00      1.00      1.00  90960082



In [None]:
import joblib

joblib.dump(rf_classifier, "random_forest.joblib")

['random_forest.joblib']

NameError: name 'model' is not defined

In [None]:

# Save the trained model to a file using pickle
with open('randomforest_classifier.pkl', 'wb') as f:
    pickle.dump(rf_classifier, f)

print("Model saved successfully as 'randomforest_classifier.pkl'")

In [None]:
def write_to_file(filename, content):
    with open(filename, 'w') as f:
        f.write(content)
# Example usage:
filename = 'output.txt'

write_to_file(filename, report)

In [None]:
X_test.shape

(359, 256, 256, 11)

In [None]:
test_x = np.reshape(X_test,(359*256*256,11))

In [None]:
y_test.shape

(359, 256, 256, 1)

In [None]:
test_y = np.reshape(y_test,(359*256*256,1))
test_y.shape

(23527424, 1)

In [None]:
result = rf_classifier.predict(test_x)

In [None]:
report = classification_report(result, test_y)
report

'              precision    recall  f1-score   support\n\n           0       1.00      1.00      1.00  23526792\n           1       0.39      0.23      0.29       632\n\n    accuracy                           1.00  23527424\n   macro avg       0.69      0.62      0.65  23527424\nweighted avg       1.00      1.00      1.00  23527424\n'

In [None]:
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00  23526792
           1       0.39      0.23      0.29       632

    accuracy                           1.00  23527424
   macro avg       0.69      0.62      0.65  23527424
weighted avg       1.00      1.00      1.00  23527424



In [None]:
# import os

# def shutdown():
#     if os.name == 'posix':  # For UNIX/Linux/MacOS
#         os.system('shutdown -h now')
#     elif os.name == 'nt':  # For Windows
#         os.system('shutdown /s /t 1')
#     else:
#         raise OSError(f"Unsupported operating system: {os.name}")

# # Calling the shutdown function
# shutdown()


In [None]:
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

# # Define ImageDataGenerator with rotation augmentation
# datagen = ImageDataGenerator(
#     rotation_range=45,  # Rotate images randomly up to 45 degrees
#     rescale=1./255  # Normalize pixel values (assuming pixel range 0-255)
# )

# # Example usage:
# batch_size = 32
# # Create generators for training and validation data
# train_generator = datagen.flow(X_train, y_train, batch_size=batch_size)
# val_generator = datagen.flow(X_val, y_val, batch_size=batch_size)

# # Note: No need to augment validation data, so we only apply rotation augmentation to training data




In [None]:
# import tensorflow as tf
# from tensorflow.keras.losses import SparseCategoricalCrossentropy

# # Compute class weights based on frequency
# def compute_class_weights(y_train):
#     class_weights = {}
#     total_samples = len(y_train)
#     unique_classes = np.unique(y_train)
#     class_counts = np.bincount(y_train.flatten())

#     for i, count in enumerate(class_counts):
#         class_weights[i] = (1 / count) * (total_samples / len(unique_classes))

#     return class_weights

# # Example usage
# # Assuming y_train is your training labels (shape: (694, 256, 256))
# y_train_flat = y_train.flatten()
# class_weights = compute_class_weights(y_train_flat)

# # Define weighted loss function
# loss_function = SparseCategoricalCrossentropy(from_logits=True, weight=class_weights)

# # Compile your model with this loss function
# model.compile(optimizer='adam', loss=loss_function, metrics=['accuracy'])

# # Train your model using the generators
# history = model.fit(train_generator, epochs=num_epochs, validation_data=val_generator)
