Here's an outline of a Jupyter Notebook to guide you in classifying jaguar habitat using location data and an 8-band satellite image. The Python libraries used in this notebook are pandas, rasterio, geopandas, numpy, and sklearn. This Jupyter Notebook provides a method for classifying jaguar habitat in satellite imagery using location data from a CSV file. The RandomForestClassifier is used as the classification model, but other classifiers can also be tested to see if they provide better results.

In [None]:
# Import necessary libraries
import pandas as pd
import rasterio
import geopandas as gpd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix


In [None]:
# Load the CSV file containing jaguar location data and convert it to a GeoDataFrame.
csv_file = 'jaguar_locations.csv'
df = pd.read_csv(csv_file)
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

In [None]:
# Load the 8-band satellite image using rasterio.
satellite_image = 'satellite_image.tif'
with rasterio.open(satellite_image) as src:
    image = src.read()
    profile = src.profile

In [None]:

# Create a function to extract pixel values for each jaguar location.
def extract_pixel_values(gdf, raster):
    values = []
    for point in gdf.geometry:
        row, col = raster.index(point.x, point.y)
        pixel_values = raster.read()[:, row, col]
        values.append(pixel_values)
    return np.array(values)


In [None]:

# Extract pixel values for each jaguar location using the function.
X = extract_pixel_values(gdf, src)
y = gdf['habitat'].values


In [None]:

# Split the data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

In [None]:

# Train a RandomForestClassifier model on the training data.
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [None]:

# Predict the habitat classes on the testing set and calculate the accuracy score.
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


In [None]:

# Create a confusion matrix to evaluate the model's performance.
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)


In [None]:

# Apply the model to the entire satellite image to classify jaguar habitat.
def classify_image(image, clf):
    img_shape = image.shape
    flattened_image = image.reshape(img_shape[0], -1).T
    classified_data = clf.predict(flattened_image)
    classified_image = classified_data.reshape(img_shape[1], img_shape[2])
    return classified_image


In [None]:

# Save the classified image as a GeoTIFF file.
classified_image = classify_image(image, clf)
profile.update(dtype=rasterio.uint8, count=1)
output_file = "classified_habitat.tif"


In [None]:

with rasterio.open(output_file, 'w', **profile) as dst:
    dst.write(classified_image.astype(rasterio.uint8), 1)

# load output into qgis and inspect


In [None]:
# Method using 10x10 pixel chips

"""
This code reads the CSV file and satellite image, extracts 10x10 pixel chips, reshapes the chips into 1D feature vectors, splits the data into training and testing sets, trains a Random Forest classifier on the training data, and evaluates the classifier's performance on the test data.
"""


import pandas as pd
import rasterio
import geopandas as gpd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Function to extract pixel chips


def extract_pixel_chips(gdf, raster, chip_size=10):
    half_chip = chip_size // 2
    values = []
    for point in gdf.geometry:
        row, col = raster.index(point.x, point.y)
        pixel_values = raster.read(
        )[:, row-half_chip:row+half_chip, col-half_chip:col+half_chip]
        values.append(pixel_values)
    return np.array(values)


# Read CSV and create GeoDataFrame
csv_file = 'jaguar_locations.csv'
df = pd.read_csv(csv_file)
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

# Read satellite image
satellite_image = 'satellite_image.tif'
with rasterio.open(satellite_image) as src:
    image = src.read()
    profile = src.profile

# Extract pixel chips and labels
X = extract_pixel_chips(gdf, src)
y = gdf['habitat'].values

# Reshape the pixel chips into 1D feature vectors (input_channels * chip_size * chip_size)
input_channels = X.shape[1]
X = X.reshape(X.shape[0], input_channels * 10 * 10)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

# Create and train the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict the habitat class for test data and evaluate performance
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))