# Methane detection hackathon

### Goal : detect methane leaks in the atmosphere based on satelite images

It's a binary classification problem : does the image contain a plume of methane or not ?

In order to do that, a dataset containing images in `tif` format is provided along with metadata including :
* path
* date the satelite image was taken
* class (`plume` or `no_plume`)
* an ID identifying the location
* latitude and longitude coordinates locating the center of the plume (`lat`,`lon`)
* pixel coordinates locating the center of the plume in the image (`coord_x`,`coord_y`). Please be midnful that the axis origin (0,0) is at the top left corner of the image

The dataset contains two folders:
- `plume` : contains all images with plumes of methane.
- `no_plume` : contains all images with no plume of methane.


**All images have a resolution of 64x64 and they are in gray scale (2D-arrays).**

Images names are written in the following format `{date}_methane_mixing_ratio_id_{location id}.tif`

### 1. Install requirements

In [None]:
!pip install rasterio

### 2. Import statements

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.callbacks import Callback
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import roc_auc_score, roc_curve, auc


### 3. Read an image

In [None]:
import os
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from PIL import Image  # Import the Image class from PIL


class PlumeDataset:
    def __init__(self, plume_dir, no_plume_dir, image_size=(64, 64)):
        self.plume_images = [os.path.join(plume_dir, img) for img in os.listdir(plume_dir)]
        self.no_plume_images = [os.path.join(no_plume_dir, img) for img in os.listdir(no_plume_dir)]
        self.images = self.plume_images + self.no_plume_images
        self.labels = [1] * len(self.plume_images) + [0] * len(self.no_plume_images)
        self.image_size = image_size

    def __len__(self):
        return len(self.images)

    def load_and_preprocess_image(self, image_path):
        # Check if the file is .DS_Store and skip it
        if image_path.endswith('.DS_Store'):
            return None

        if os.path.isfile(image_path):  # Check if the file exists
            img = Image.open(image_path)
            img = img.convert('L')  # Convert to grayscale
            img = img.resize(self.image_size)  # Resize to your desired dimensions
            img = np.array(img) / 255.0  # Normalize pixel values to [0, 1]
            return img
        else:
            # Handle the case where the file does not exist (e.g., .DS_Store)
            # You can choose to return None or raise an exception here.
            return None

    def get_data(self):
        X = []
        y = []

        for image_path, label in zip(self.images, self.labels):
            img = self.load_and_preprocess_image(image_path)
            if img is not None:
                X.append(img)
                y.append(label)

        X = np.array(X)
        y = to_categorical(y, num_classes=2)  # Convert labels to one-hot encoding

        return X, y



In [None]:
# Define your data directories
plume_dir = 'cleanr/train data/images/plume'
no_plume_dir = 'cleanr/train data/images/no_plume'

# Create an instance of the PlumeDataset
plume_dataset = PlumeDataset(plume_dir, no_plume_dir, image_size=(64, 64))

# Load and preprocess the data
X, y = plume_dataset.get_data()

# Now, X contains the preprocessed images, and y contains the corresponding labels in one-hot encoding.

# Split the data into training and validation sets as needed
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


### 5. Metadata

- one pixel = 2.4 km x 2.4 km
- Lat Long = latitude and longitude of the estimated methane plume center
- coord_x coord_y = plume position in the image (origin = top / left) in pixels

In [None]:
metadata = pd.read_csv("cleanr/train data/metadata.csv")

In [None]:
metadata.head(5)

In [None]:
from tensorflow.keras import layers
from sklearn.metrics import roc_auc_score
import tensorflow as tf

# Define your CNN model
model = Sequential()
model.add(layers.Input(shape=(64, 64, 1)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.4))  # Dropout layer as specified
model.add(Dense(2, activation='softmax'))  # Two classes: 'plume' and 'no_plume'


def roc_auc(y_true, y_pred):
    # Calculate ROC AUC score using scikit-learn's roc_auc_score function
    auc = tf.py_function(roc_auc_score, (y_true, y_pred), tf.float32)
    return auc

#Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', roc_auc])


class RocAucEvaluation(Callback):
    def __init__(self, validation_data=()):
        super(Callback, self).__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs={}):
        X_val, y_val = self.validation_data
        y_pred = self.model.predict(X_val)
        roc_auc = roc_auc_score(y_val, y_pred)
        print("\nROC AUC: {:.4f}\n".format(roc_auc))

roc_auc_callback = RocAucEvaluation(validation_data=(X_val, y_val))

# Train the model with class weights
history = model.fit(
    X_train,
    y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[roc_auc_callback]
)

# Save the model if needed
model.save('methane_detection_model.h5')

Model with Augmented data

In [None]:
# Define your data directories
plume_dir = 'data_augmented/plume'
no_plume_dir = 'data_augmented/no_plume'

# Create an instance of the PlumeDataset
plume_dataset = PlumeDataset(plume_dir, no_plume_dir, image_size=(64, 64))

# Load and preprocess the data
X, y = plume_dataset.get_data()

# Now, X contains the preprocessed images, and y contains the corresponding labels in one-hot encoding.

# Split the data into training and validation sets as needed
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras import layers
from sklearn.metrics import roc_auc_score
import tensorflow as tf

# Define your CNN model
model = Sequential()
model.add(layers.Input(shape=(64, 64, 1)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.4))  # Dropout layer as specified
model.add(Dense(2, activation='softmax'))  # Two classes: 'plume' and 'no_plume'



#Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


class RocAucEvaluation(Callback):
    def __init__(self, validation_data=()):
        super(Callback, self).__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs={}):
        X_val, y_val = self.validation_data
        y_pred = self.model.predict(X_val)
        roc_auc = roc_auc_score(y_val, y_pred)
        print("\nROC AUC: {:.4f}\n".format(roc_auc))

roc_auc_callback = RocAucEvaluation(validation_data=(X_val, y_val))

# Train the model with class weights
history = model.fit(
    X_train,
    y_train,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=[roc_auc_callback]
)

# Save the model if needed
model.save('methane_detection_model_augmented.h5')