In [4]:
# The dataset contained image that mostly were black pixels. Removving them

import os
import shutil
from PIL import Image
import numpy as np

pixel_limit = 60
source_folder = 'fragments/fragments'  # Folder containing the images
target_folder = 'fragments/black_images'  # Folder to move images with > pixel_limit

# Create target folder if it doesn't exist
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# Function to check if an image has more black pixels than the limit
def is_more_black_than_pixel_limit(image_path):
    # Load the image
    image = Image.open(image_path)
    
    
    # gray_image = image.convert('RGB')  # Convert to grayscale ('L' mode)

    # Convert the image to a NumPy array
    image_array = np.array(image)

    # Define a black pixel as 0
    black_pixels = np.sum(image_array == 0)

    # Total number of pixels in the image
    total_pixels = image_array.size

    # Calculate the percentage of black pixels
    black_pixel_percentage = (black_pixels / total_pixels) * 100

    # Check if over 95% of the pixels are black
    return black_pixel_percentage > pixel_limit

# Loop through all files in the source folder
for filename in os.listdir(source_folder):
    # Check if the file is an image (you can add more image formats as needed)
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
        image_path = os.path.join(source_folder, filename)
        
        # Apply the filter to the image
        if is_more_black_than_pixel_limit(image_path):
            # Move the image to the target folder
            shutil.move(image_path, os.path.join(target_folder, filename))
            print(f"Moved {filename} to the target folder because it has over {pixel_limit}% black pixels.")
            
        else:
            pass
            print(f"{filename} was not moved.")




fragment_S2A_MSIL2A_20220725T093041_N0400_R136_T34TFS_20220725T140223_25_67.jpg was not moved.
fragment_S2A_MSIL2A_20220725T093041_N0400_R136_T34TFS_20220725T140223_63_32.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T35TLN_20220722T134859_16_61.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T35TLN_20220722T134859_50_34.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_4_44.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T35TLL_20220722T134859_47_67.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_30_25.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T35TLN_20220722T134859_38_8.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T35TLL_20220722T134859_53_12.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T35TLL_20220722T134859_15_47.jpg was not moved.
fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TF

In [5]:
import os
import shutil
import pandas as pd

df = pd.read_csv('test-jpg.csv')  # Read the csv containing the labels

# Define the directories where images will be moved
deforestation_dir = 'fragments/fragments/deforestation'
not_deforestation_dir = 'fragments/fragments/not_deforestation'

# Create the directories if they don't exist
os.makedirs(deforestation_dir, exist_ok=True)
os.makedirs(not_deforestation_dir, exist_ok=True)

# Iterate through the dataframe
for index, row in df.iterrows():
    image_path = "fragments/fragments/" + row['image']
    image_label = row['label']

    # Check if the image exists
    if os.path.exists(image_path):
        # Get the image filename (e.g., 'image.jpg')
        image_name = os.path.basename(image_path)
        
        # Determine the target directory based on the label
        if image_label == 'deforestation':
            target_dir = deforestation_dir
        elif image_label == 'not_deforestation':
            target_dir = not_deforestation_dir
        else:
            print(f"Unknown class: {image_label} for image {image_name}")
            continue  # Skip the image if class is unknown

        # Define the full path to the target location
        target_path = os.path.join(target_dir, image_name)

        # Move the image
        try:
            shutil.copy(image_path, target_path)
            print(f"copied {image_name} to {target_dir}")
        except Exception as e:
            print(f"Error copying {image_name}: {e}")
    else:
        print(f"Image path does not exist: {image_path}")



copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_0_59.jpg to fragments/fragments/deforestation
copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_0_60.jpg to fragments/fragments/deforestation
copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_10_14.jpg to fragments/fragments/deforestation
copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_10_15.jpg to fragments/fragments/deforestation
copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_10_17.jpg to fragments/fragments/deforestation
copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_10_18.jpg to fragments/fragments/deforestation
copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_10_22.jpg to fragments/fragments/deforestation
copied fragment_S2A_MSIL2A_20220722T092041_N0400_R093_T34TFR_20220722T134859_10_28.jpg to fragments/fragments/deforestation
copied fra

In [13]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image

source_folders = ["fragments/fragments/deforestation", "fragments/fragments/not_deforestation"]

# Load a pretrained ResNet model
model = models.resnet50(pretrained=True)
model.eval()  # Set the model to evaluation mode (important for inference)

# Preprocess the image (resize, normalize, convert to tensor)
transform = transforms.Compose([
    transforms.Resize(256),  # Resize the image
    transforms.CenterCrop(224),  # Crop the image to 224x224
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Function to extract features
def extract_features(image_path):
    image = Image.open(image_path)  # Open image
    image = transform(image).unsqueeze(0)  # Apply transformations and add batch dimension

    with torch.no_grad():  # Disable gradient computation
        features = model(image)  # Get the features (model output)
    
    return features.numpy().flatten()  # Flatten the features to a 1D array

# Example: Extract features from an image
image_features_df = pd.DataFrame(columns=["image", "features", "label"])
for i,folder in enumerate(source_folders):
    for image in os.listdir(folder):
        # The first folder contains deforestation images
        if i == 0:
            label = "deforestation"
        else:
            label = "not_deforestation"
        features = extract_features(folder + '/' + image)
        
        data = {"image": image, "features": features, "label": label}
        image_features_df.loc[len(image_features_df.index)] = data 
        
image_features_df.shape
        
    




(2664, 3)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
import pickle

df_randomized = image_features_df.sample(frac=1, random_state=42).reset_index(drop=True)
df_randomized.to_pickle("randomized_image_features.pkl")
df_randomized = pd.read_pickle("randomized_image_features.pkl")

Unnamed: 0,image,features,label
0,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[2.4424427, 2.4895177, 2.1929562, 3.1329734, 2...",deforestation
1,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[2.4751163, 1.515187, -0.39312148, 1.9784185, ...",deforestation
2,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[1.9109206, 2.411919, 1.7674211, 4.1504984, 3....",deforestation
3,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[2.327355, 2.9859848, 2.6460383, 3.6085405, 3....",not_deforestation
4,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[0.6897718, 1.3784401, 0.28450155, 1.9980146, ...",deforestation
...,...,...,...
2659,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[2.146039, 2.372586, 2.2625418, 2.5442817, 2.0...",deforestation
2660,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[1.1161923, 2.1728153, 0.90005565, 2.112605, 2...",deforestation
2661,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[-0.5645826, 2.1636438, 2.1715364, 2.5930474, ...",deforestation
2662,fragment_S2A_MSIL2A_20220722T092041_N0400_R093...,"[4.216282, 2.4061127, 0.7757796, 2.7689018, 2....",deforestation


In [20]:
X = list(df_randomized["features"].values)
y = list(df_randomized["label"].values)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Training dataset: {len(X_train)} samples with {len(X_train[0])} features, Test dataset: {len(y_test)} samples.")

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)


Training dataset: 1864 samples with 1000 features, Test dataset: 800 samples.


In [21]:

# Step 3: Train an SVM classifier
svm = SVC(kernel='poly')  # You can experiment with other kernels like 'rbf' or 'poly'
svm.fit(X_train, y_train)

# Step 4: Evaluate the model on the test set
y_pred = svm.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
if accuracy > 0.9:
    with open('classifier.pkl','wb') as f:
        pickle.dump(svm,f)
        
recall = recall_score(y_test, y_pred, pos_label="deforestation")
precision = precision_score(y_test, y_pred, pos_label="deforestation")
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Recall: {recall * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')


Accuracy: 90.62%
Recall: 94.61%
Precision: 92.89%
