Here's an outline of a Jupyter Notebook to guide you in classifying jaguar habitat using location data and an 8-band satellite image. The Python libraries used in this notebook are pandas, rasterio, geopandas, numpy, and sklearn. This Jupyter Notebook provides a method for classifying jaguar habitat in satellite imagery using location data from a CSV file. The RandomForestClassifier is used as the classification model, but other classifiers can also be tested to see if they provide better results.


In [1]:
""" 
Goal: classifying jaguar habitat using location data and an 8-band satellite image
"""

# Import necessary libraries
import pandas as pd
import rasterio
import geopandas as gpd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the CSV file containing jaguar location data and convert it to a GeoDataFrame.
csv_file = '../../../data/jaguar/clp_jaguar_locations.csv'
df = pd.read_csv(csv_file)
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.X, df.Y))

# Load the 8-band satellite image using rasterio.
satellite_image = '../../../data/jaguar/clp_20220331_162002_52_241c_3B_AnalyticMS_SR_8b.tif'
with rasterio.open(satellite_image) as src:
   #  print(src.indexes)  # expecting 8 indexes for the 8 bands
    image = src.read()
    profile = src.profile
# Create a function to extract pixel values for each jaguar location.
def extract_pixel_values(gdf, raster):
    values = []
    for point in gdf.geometry:
        # print('point X', point.x, 'Y', point.y)
        # print('raster', raster)
        print('raster index', raster.index(point.x, point.y))
        row, col = raster.index(point.x, point.y)
        # print(row, col)
        pixel_values = raster.read()[:, row, col]
        values.append(pixel_values)
    return np.array(values)

# Extract pixel values for each jaguar location using the function.
X = extract_pixel_values(gdf, image)
y = gdf['habitat'].values

# Split the data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

# Train a RandomForestClassifier model on the training data.
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict the habitat classes on the testing set and calculate the accuracy score.
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Create a confusion matrix to evaluate the model's performance.
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

# Apply the model to the entire satellite image to classify jaguar habitat.
def classify_image(image, clf):
    img_shape = image.shape
    flattened_image = image.reshape(img_shape[0], -1).T
    classified_data = clf.predict(flattened_image)
    classified_image = classified_data.reshape(img_shape[1], img_shape[2])
    return classified_image

# Save the classified image as a GeoTIFF file.
classified_image = classify_image(image, clf)
profile.update(dtype=rasterio.uint8, count=1)
output_file = "classified_habitat.tif"

with rasterio.open(output_file, 'w', **profile) as dst:
    dst.write(classified_image.astype(rasterio.uint8), 1)



In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from pyproj import Transformer
from rasterio.transform import xy
from rasterio.mask import mask
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# Load the 8-band satellite image using rasterio.
satellite_image = '../../../data/jaguar/clp_20220331_162002_52_241c_3B_AnalyticMS_SR_8b.tif'

src = rasterio.open(satellite_image)
image = src.read()
profile = src.profile
# profile
# Load the CSV file containing jaguar location data and convert it to a GeoDataFrame.
csv_file = '../../../data/jaguar/clp_jaguar_locations.csv'
df = pd.read_csv(csv_file)

gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.X, df.Y))

# Set the CRS for the jaguar location GeoDataFrame
gdf = gdf.set_crs("epsg:4326")
# gdf

# Reproject the jaguar location GeoDataFrame to match the CRS of the satellite image
def reproject_coordinates(x, y, src_crs, dest_crs):
    transformer = Transformer.from_crs(src_crs, dest_crs, always_xy=True)
    new_x, new_y = transformer.transform(x, y)
    return new_x, new_y

src_crs = gdf.crs
dest_crs = profile['crs']

gdf['X_proj'], gdf['Y_proj'] = zip(*gdf.apply(lambda row: reproject_coordinates(row['X'], row['Y'], src_crs, dest_crs), axis=1))
gdf['geometry'] = gpd.points_from_xy(gdf.X_proj, gdf.Y_proj)


# gdf

# Define function to extract pixel values from satellite image
def extract_pixel_values(src, gdf, band_count):
    gdf['row'], gdf['col'] = zip(*[src.index(x, y) for x, y in zip(gdf.X, gdf.Y)])
    pixel_values = []

    for _, row in gdf.iterrows():
        pixel = []
        for band in range(1, band_count + 1):
            pixel.append(src.read(band)[row['row'], row['col']])
        pixel_values.append(pixel)

    return np.array(pixel_values)



# Extract pixel values for jaguar locations
pixel_values = extract_pixel_values(src, gdf, profile['count'])


# Add a 'habitat' column to the GeoDataFrame, setting it to 1 (indicating jaguar habitat)
gdf['habitat'] = 1


# Prepare dataset for training
X = pixel_values
y = gdf['habitat'].values


# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Create and train Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


# Predict on test set
y_pred = clf.predict(X_test)


# Calculate accuracy, confusion matrix, and classification report
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:", conf_matrix)
print("Classification Report:", class_report)


# Apply the classifier to the entire satellite image
image_2d = image.reshape(profile['count'], -1).T
image_classified = clf.predict(image_2d).reshape(profile['height'], profile['width'])


# Save the classified image as a GeoTIFF
classified_profile = profile.copy()
classified_profile.update({
    'dtype': 'uint8',
    'count': 1,
    'nodata': 255
})

with rasterio.open('classified_image.tif', 'w', **classified_profile) as dst:
    dst.write(image_classified.astype(rasterio.uint8), 1)
