# Import libraries

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Load dataset(s)

In [2]:
# Load dataset
data = pd.read_csv('ascat_era5_era5land_cell1320.csv')
#data = data.sample(frac=0.1, random_state=1)  # Sample 10% of the data for testing
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9705133 entries, 0 to 9705132
Data columns (total 32 columns):
 #   Column                             Dtype  
---  ------                             -----  
 0   Unnamed: 0                         object 
 1   gpi_ascat                          int64  
 2   slope40                            float64
 3   slope40_rolling_mean_15days        float64
 4   slope40_rolling_mean_30days        float64
 5   slope40_rolling_mean_60days        float64
 6   slope_slopes                       float64
 7   difference_of_slope40              float64
 8   diff_of_difference_of_slope40      float64
 9   curvature40                        float64
 10  curvature40_rolling_mean_15days    float64
 11  curvature40_rolling_mean_30days    float64
 12  curvature40_rolling_mean_60days    float64
 13  slope_curvatures                   float64
 14  difference_of_curvature40          float64
 15  diff_of_difference_of_curvature40  float64
 16  backscatter40     

In [3]:
# Count pixels with at least one row with target=0 and one row with target=1
pixel_groups = data.groupby('gpi_ascat')['target'].nunique()
pixels_with_both_classes = pixel_groups[pixel_groups == 2].index.tolist()
print(f'Number of pixels with both target 0 and 1: {len(pixels_with_both_classes)}')
print(f'Pixels with both target 0 and 1: {pixels_with_both_classes}')

Number of pixels with both target 0 and 1: 1021
Pixels with both target 0 and 1: [820895, 821039, 821128, 821183, 821272, 821416, 821505, 821560, 821649, 821793, 821882, 822026, 822115, 822259, 822403, 822492, 822547, 822636, 822725, 822869, 823013, 823102, 823157, 823246, 823479, 823623, 823712, 823856, 824000, 824089, 824233, 824322, 824377, 824466, 824610, 824699, 824754, 824843, 825076, 825364, 825453, 825597, 825686, 825830, 826063, 826207, 826296, 826351, 826440, 826673, 826961, 827050, 827283, 827427, 827660, 827893, 828037, 828181, 828270, 828414, 828469, 828503, 828558, 828647, 828880, 829024, 829257, 829490, 829634, 829778, 829867, 830011, 830066, 830244, 830477, 830621, 830676, 830765, 830854, 831087, 831231, 831464, 831608, 831663, 831841, 831985, 832074, 832218, 832273, 832451, 832595, 832650, 832684, 832828, 833205, 833260, 833438, 833637, 833815, 834048, 834192, 834247, 834425, 834658, 834802, 834857, 835035, 835179, 835234, 835268, 835412, 835645, 835789, 836022, 836221

# Train ML model for a single pixel

In [4]:
# # Select a single pixel
# # target_pixel = data['gpi_ascat'].iloc[0]  # Example: Select the first unique pixel
# # target_pixel = 1065964  # Select the specific pixel
# target_pixel = pixels_with_both_classes[0]  # Select the first pixel with both classes
# data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel

# # Check the number of rows for this pixel
# num_rows = len(data_pixel)
# print(f'Number of rows for pixel {target_pixel}: {num_rows}')

In [5]:
# # Select relevant features and target
# features = ['slope40', 'curvature40', 'backscatter40']
# X = data_pixel[features]
# y = data_pixel['target']

# # Split data into training and test sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# # Feature scaling
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

In [6]:
# # Initialize and train the model
# model = RandomForestClassifier(random_state=1)
# model.fit(X_train, y_train)

In [7]:
# # Make predictions
# predictions = model.predict(X_test)

In [8]:
# # Evaluate the model
# print("--- Random Forest for Pixel", target_pixel, "---")
# print(classification_report(y_test, predictions))

In [9]:
# # Confusion matrix
# cm = confusion_matrix(y_test, predictions)
# plt.figure(figsize=(6, 4))
# sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
# plt.title(f'Confusion Matrix for Pixel {target_pixel}')
# plt.xlabel('Predicted')
# plt.ylabel('Actual')
# plt.show()

# ML combination backscatter, slope, curvature

Train ML model for all pixels that contain both target classes (both 0 and 1)

In [11]:
# Select relevant features
features = ['slope40', 'curvature40', 'backscatter40']

# Tables to store the results
pixel_metrics = []  # Table 1 (Separate for Each Class)
pixel_metrics_aggregated = []  # Table 2 (Aggregated Metrics)
full_predictions = []  # Table 3 (Full Data with Predictions)

# Train and evaluate a model for each pixel with both classes
for target_pixel in pixels_with_both_classes:
    # print(f'\nTraining model for pixel {target_pixel}')
    data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel
    
    # Check the number of rows for this pixel
    # num_rows = len(data_pixel)
    # print(f'Number of rows for pixel {target_pixel}: {num_rows}')
    
    # Prepare features and target
    X = data_pixel[features]
    y = data_pixel['target']
    
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Initialize and train the model
    model = RandomForestClassifier(random_state=1)
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # # Prints to evaluate the model
    # print(f"--- Random Forest for Pixel {target_pixel} ---")
    # print(classification_report(y_test, predictions))
    
    # # Print confusion matrix (absolute values)
    # cm = confusion_matrix(y_test, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    # plt.title(f'Confusion Matrix (Absolute) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()
    
    # # Print confusion matrix (relative values / percentages)
    # cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize by row (actual values)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm_percentage, annot=True, fmt='.2%', cmap='Blues')
    # plt.title(f'Confusion Matrix (Percentage) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()

    # Store predictions in the full dataset
    data_pixel.loc[y_test.index, 'prediction'] = predictions
    full_predictions.append(data_pixel)
    
    # Compute metrics for each class
    for class_label in [0, 1]:
        class_mask = (y_test == class_label)
        if np.sum(class_mask) > 0:  # Avoid division by zero
            pixel_metrics.append({
                'gpi_ascat': target_pixel,
                'class': class_label,
                'accuracy': accuracy_score(y_test[class_mask], predictions[class_mask]),
                'precision': precision_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'recall': recall_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'f1_score': f1_score(y_test, predictions, pos_label=class_label, zero_division=0)
            })
    
    # Compute aggregated (micro/macro) metrics for the pixel
    pixel_metrics_aggregated.append({
        'gpi_ascat': target_pixel,
        'accuracy': accuracy_score(y_test, predictions),
        'precision_macro': precision_score(y_test, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(y_test, predictions, average='macro', zero_division=0),
        'f1_macro': f1_score(y_test, predictions, average='macro', zero_division=0),
        'precision_micro': precision_score(y_test, predictions, average='micro', zero_division=0),
        'recall_micro': recall_score(y_test, predictions, average='micro', zero_division=0),
        'f1_micro': f1_score(y_test, predictions, average='micro', zero_division=0)
    })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a c

In [12]:
# Convert results to DataFrames
df_pixel_metrics = pd.DataFrame(pixel_metrics)  # Table 1
df_pixel_metrics_aggregated = pd.DataFrame(pixel_metrics_aggregated)  # Table 2
df_full_predictions = pd.concat(full_predictions).reset_index(drop=True)  # Table 3

In [13]:
# df_pixel_metrics.head()

In [14]:
# df_pixel_metrics_aggregated.head()

In [15]:
# df_full_predictions.head()

In [16]:
# Save results to CSV
df_pixel_metrics.to_csv('cell1320_metrics_separate_baseline.csv', index=False)
df_pixel_metrics_aggregated.to_csv('cell1320_metrics_aggregated_baseline.csv', index=False)
df_full_predictions.to_csv('cell1320_predictions_baseline.csv', index=False)

# ML combination backscatter, slope_rolling_mean_15, curvature_rolling_mean_15

Train ML model for all pixels that contain both target classes (both 0 and 1)

In [17]:
# Select relevant features plus rolling means for 60 days
features_rolling_mean_15days = ['slope40_rolling_mean_15days', 'curvature40_rolling_mean_15days', 'backscatter40']

# Tables to store the results
pixel_metrics_rolling_mean_15days = []  # Table 1 (Separate for Each Class)
pixel_metrics_aggregated_rolling_mean_15days = []  # Table 2 (Aggregated Metrics)
full_predictions_rolling_mean_15days = []  # Table 3 (Full Data with Predictions)

# Train and evaluate a model for each pixel with both classes
for target_pixel in pixels_with_both_classes:
    # print(f'\nTraining model for pixel {target_pixel}')
    data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel
    
    # Check the number of rows for this pixel
    # num_rows = len(data_pixel)
    # print(f'Number of rows for pixel {target_pixel}: {num_rows}')
    
    # Prepare features and target
    X = data_pixel[features_rolling_mean_15days]
    y = data_pixel['target']
    
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Initialize and train the model
    model = RandomForestClassifier(random_state=1)
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # # Prints to evaluate the model
    # print(f"--- Random Forest for Pixel {target_pixel} ---")
    # print(classification_report(y_test, predictions))
    
    # # Print confusion matrix (absolute values)
    # cm = confusion_matrix(y_test, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    # plt.title(f'Confusion Matrix (Absolute) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()
    
    # # Print confusion matrix (relative values / percentages)
    # cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize by row (actual values)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm_percentage, annot=True, fmt='.2%', cmap='Blues')
    # plt.title(f'Confusion Matrix (Percentage) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()

    # Store predictions in the full dataset
    data_pixel.loc[y_test.index, 'prediction'] = predictions
    full_predictions_rolling_mean_15days.append(data_pixel)
    
    # Compute metrics for each class
    for class_label in [0, 1]:
        class_mask = (y_test == class_label)
        if np.sum(class_mask) > 0:  # Avoid division by zero
            pixel_metrics_rolling_mean_15days.append({
                'gpi_ascat': target_pixel,
                'class': class_label,
                'accuracy': accuracy_score(y_test[class_mask], predictions[class_mask]),
                'precision': precision_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'recall': recall_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'f1_score': f1_score(y_test, predictions, pos_label=class_label, zero_division=0)
            })
    
    # Compute aggregated (micro/macro) metrics for the pixel
    pixel_metrics_aggregated_rolling_mean_15days.append({
        'gpi_ascat': target_pixel,
        'accuracy': accuracy_score(y_test, predictions),
        'precision_macro': precision_score(y_test, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(y_test, predictions, average='macro', zero_division=0),
        'f1_macro': f1_score(y_test, predictions, average='macro', zero_division=0),
        'precision_micro': precision_score(y_test, predictions, average='micro', zero_division=0),
        'recall_micro': recall_score(y_test, predictions, average='micro', zero_division=0),
        'f1_micro': f1_score(y_test, predictions, average='micro', zero_division=0)
    })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a c

In [18]:
# Convert results to DataFrames
df_pixel_metrics_rolling_mean_15days = pd.DataFrame(pixel_metrics_rolling_mean_15days)  # Table 1
df_pixel_metrics_aggregated_rolling_mean_15days = pd.DataFrame(pixel_metrics_aggregated_rolling_mean_15days)  # Table 2
df_full_predictions_rolling_mean_15days = pd.concat(full_predictions_rolling_mean_15days).reset_index(drop=True)  # Table 3

In [19]:

# Save results to CSV
df_pixel_metrics_rolling_mean_15days.to_csv('cell1320_metrics_separate_roll15.csv', index=False)
df_pixel_metrics_aggregated_rolling_mean_15days.to_csv('cell1320_metrics_aggregated_roll15.csv', index=False)
df_full_predictions_rolling_mean_15days.to_csv('cell1320_predictions_roll15.csv', index=False)

# ML combination backscatter, slope_rolling_mean_30, curvature_rolling_mean_30

Train ML model for all pixels that contain both target classes (both 0 and 1)

In [20]:
# Select relevant features plus rolling means for 60 days
features_rolling_mean_30days = ['slope40_rolling_mean_30days', 'curvature40_rolling_mean_30days', 'backscatter40']

# Tables to store the results
pixel_metrics_rolling_mean_30days = []  # Table 1 (Separate for Each Class)
pixel_metrics_aggregated_rolling_mean_30days = []  # Table 2 (Aggregated Metrics)
full_predictions_rolling_mean_30days = []  # Table 3 (Full Data with Predictions)

# Train and evaluate a model for each pixel with both classes
for target_pixel in pixels_with_both_classes:
    # print(f'\nTraining model for pixel {target_pixel}')
    data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel
    
    # Check the number of rows for this pixel
    # num_rows = len(data_pixel)
    # print(f'Number of rows for pixel {target_pixel}: {num_rows}')
    
    # Prepare features and target
    X = data_pixel[features_rolling_mean_30days]
    y = data_pixel['target']
    
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Initialize and train the model
    model = RandomForestClassifier(random_state=1)
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # # Prints to evaluate the model
    # print(f"--- Random Forest for Pixel {target_pixel} ---")
    # print(classification_report(y_test, predictions))
    
    # # Print confusion matrix (absolute values)
    # cm = confusion_matrix(y_test, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    # plt.title(f'Confusion Matrix (Absolute) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()
    
    # # Print confusion matrix (relative values / percentages)
    # cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize by row (actual values)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm_percentage, annot=True, fmt='.2%', cmap='Blues')
    # plt.title(f'Confusion Matrix (Percentage) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()

    # Store predictions in the full dataset
    data_pixel.loc[y_test.index, 'prediction'] = predictions
    full_predictions_rolling_mean_30days.append(data_pixel)
    
    # Compute metrics for each class
    for class_label in [0, 1]:
        class_mask = (y_test == class_label)
        if np.sum(class_mask) > 0:  # Avoid division by zero
            pixel_metrics_rolling_mean_30days.append({
                'gpi_ascat': target_pixel,
                'class': class_label,
                'accuracy': accuracy_score(y_test[class_mask], predictions[class_mask]),
                'precision': precision_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'recall': recall_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'f1_score': f1_score(y_test, predictions, pos_label=class_label, zero_division=0)
            })
    
    # Compute aggregated (micro/macro) metrics for the pixel
    pixel_metrics_aggregated_rolling_mean_30days.append({
        'gpi_ascat': target_pixel,
        'accuracy': accuracy_score(y_test, predictions),
        'precision_macro': precision_score(y_test, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(y_test, predictions, average='macro', zero_division=0),
        'f1_macro': f1_score(y_test, predictions, average='macro', zero_division=0),
        'precision_micro': precision_score(y_test, predictions, average='micro', zero_division=0),
        'recall_micro': recall_score(y_test, predictions, average='micro', zero_division=0),
        'f1_micro': f1_score(y_test, predictions, average='micro', zero_division=0)
    })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a c

In [21]:
# Convert results to DataFrames
df_pixel_metrics_rolling_mean_30days = pd.DataFrame(pixel_metrics_rolling_mean_30days)  # Table 1
df_pixel_metrics_aggregated_rolling_mean_30days = pd.DataFrame(pixel_metrics_aggregated_rolling_mean_30days)  # Table 2
df_full_predictions_rolling_mean_30days = pd.concat(full_predictions_rolling_mean_30days).reset_index(drop=True)  # Table 3

In [22]:

# Save results to CSV
df_pixel_metrics_rolling_mean_30days.to_csv('cell1320_metrics_separate_roll30.csv', index=False)
df_pixel_metrics_aggregated_rolling_mean_30days.to_csv('cell1320_metrics_aggregated_roll30.csv', index=False)
df_full_predictions_rolling_mean_30days.to_csv('cell1320_predictions_roll30.csv', index=False)

# ML combination backscatter, slope_rolling_mean_60, curvature_rolling_mean_60

Train ML model for all pixels that contain both target classes (both 0 and 1)

In [23]:
# Select relevant features plus rolling means for 60 days
features_rolling_mean_60days = ['slope40_rolling_mean_60days', 'curvature40_rolling_mean_60days', 'backscatter40']

# Tables to store the results
pixel_metrics_rolling_mean_60days = []  # Table 1 (Separate for Each Class)
pixel_metrics_aggregated_rolling_mean_60days = []  # Table 2 (Aggregated Metrics)
full_predictions_rolling_mean_60days = []  # Table 3 (Full Data with Predictions)

# Train and evaluate a model for each pixel with both classes
for target_pixel in pixels_with_both_classes:
    # print(f'\nTraining model for pixel {target_pixel}')
    data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel
    
    # Check the number of rows for this pixel
    # num_rows = len(data_pixel)
    # print(f'Number of rows for pixel {target_pixel}: {num_rows}')
    
    # Prepare features and target
    X = data_pixel[features_rolling_mean_60days]
    y = data_pixel['target']
    
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Initialize and train the model
    model = RandomForestClassifier(random_state=1)
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # # Prints to evaluate the model
    # print(f"--- Random Forest for Pixel {target_pixel} ---")
    # print(classification_report(y_test, predictions))
    
    # # Print confusion matrix (absolute values)
    # cm = confusion_matrix(y_test, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    # plt.title(f'Confusion Matrix (Absolute) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()
    
    # # Print confusion matrix (relative values / percentages)
    # cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize by row (actual values)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm_percentage, annot=True, fmt='.2%', cmap='Blues')
    # plt.title(f'Confusion Matrix (Percentage) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()

    # Store predictions in the full dataset
    data_pixel.loc[y_test.index, 'prediction'] = predictions
    full_predictions_rolling_mean_60days.append(data_pixel)
    
    # Compute metrics for each class
    for class_label in [0, 1]:
        class_mask = (y_test == class_label)
        if np.sum(class_mask) > 0:  # Avoid division by zero
            pixel_metrics_rolling_mean_60days.append({
                'gpi_ascat': target_pixel,
                'class': class_label,
                'accuracy': accuracy_score(y_test[class_mask], predictions[class_mask]),
                'precision': precision_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'recall': recall_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'f1_score': f1_score(y_test, predictions, pos_label=class_label, zero_division=0)
            })
    
    # Compute aggregated (micro/macro) metrics for the pixel
    pixel_metrics_aggregated_rolling_mean_60days.append({
        'gpi_ascat': target_pixel,
        'accuracy': accuracy_score(y_test, predictions),
        'precision_macro': precision_score(y_test, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(y_test, predictions, average='macro', zero_division=0),
        'f1_macro': f1_score(y_test, predictions, average='macro', zero_division=0),
        'precision_micro': precision_score(y_test, predictions, average='micro', zero_division=0),
        'recall_micro': recall_score(y_test, predictions, average='micro', zero_division=0),
        'f1_micro': f1_score(y_test, predictions, average='micro', zero_division=0)
    })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a c

In [24]:
# Convert results to DataFrames
df_pixel_metrics_rolling_mean_60days = pd.DataFrame(pixel_metrics_rolling_mean_60days)  # Table 1
df_pixel_metrics_aggregated_rolling_mean_60days = pd.DataFrame(pixel_metrics_aggregated_rolling_mean_60days)  # Table 2
df_full_predictions_rolling_mean_60days = pd.concat(full_predictions_rolling_mean_60days).reset_index(drop=True)  # Table 3

In [25]:

# Save results to CSV
df_pixel_metrics_rolling_mean_60days.to_csv('cell1320_metrics_separate_roll60.csv', index=False)
df_pixel_metrics_aggregated_rolling_mean_60days.to_csv('cell1320_metrics_aggregated_roll60.csv', index=False)
df_full_predictions_rolling_mean_60days.to_csv('cell1320_predictions_roll60.csv', index=False)

# ML combination backscatter, slope_diff, curvature_diff

Train ML model for all pixels that contain both target classes (both 0 and 1)

In [10]:
# Select relevant features plus rolling means for 60 days
features_rolling_mean_diff = ['difference_of_slope40', 'difference_of_curvature40', 'backscatter40']

# Tables to store the results
pixel_metrics_rolling_mean_diff = []  # Table 1 (Separate for Each Class)
pixel_metrics_aggregated_rolling_mean_diff = []  # Table 2 (Aggregated Metrics)
full_predictions_rolling_mean_diff = []  # Table 3 (Full Data with Predictions)

# Train and evaluate a model for each pixel with both classes
for target_pixel in pixels_with_both_classes:
    # print(f'\nTraining model for pixel {target_pixel}')
    data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel
    
    # Check the number of rows for this pixel
    # num_rows = len(data_pixel)
    # print(f'Number of rows for pixel {target_pixel}: {num_rows}')
    
    # Prepare features and target
    X = data_pixel[features_rolling_mean_diff]
    y = data_pixel['target']
    
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Initialize and train the model
    model = RandomForestClassifier(random_state=1)
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # # Prints to evaluate the model
    # print(f"--- Random Forest for Pixel {target_pixel} ---")
    # print(classification_report(y_test, predictions))
    
    # # Print confusion matrix (absolute values)
    # cm = confusion_matrix(y_test, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    # plt.title(f'Confusion Matrix (Absolute) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()
    
    # # Print confusion matrix (relative values / percentages)
    # cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize by row (actual values)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm_percentage, annot=True, fmt='.2%', cmap='Blues')
    # plt.title(f'Confusion Matrix (Percentage) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()

    # Store predictions in the full dataset
    data_pixel.loc[y_test.index, 'prediction'] = predictions
    full_predictions_rolling_mean_diff.append(data_pixel)
    
    # Compute metrics for each class
    for class_label in [0, 1]:
        class_mask = (y_test == class_label)
        if np.sum(class_mask) > 0:  # Avoid division by zero
            pixel_metrics_rolling_mean_diff.append({
                'gpi_ascat': target_pixel,
                'class': class_label,
                'accuracy': accuracy_score(y_test[class_mask], predictions[class_mask]),
                'precision': precision_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'recall': recall_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'f1_score': f1_score(y_test, predictions, pos_label=class_label, zero_division=0)
            })
    
    # Compute aggregated (micro/macro) metrics for the pixel
    pixel_metrics_aggregated_rolling_mean_diff.append({
        'gpi_ascat': target_pixel,
        'accuracy': accuracy_score(y_test, predictions),
        'precision_macro': precision_score(y_test, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(y_test, predictions, average='macro', zero_division=0),
        'f1_macro': f1_score(y_test, predictions, average='macro', zero_division=0),
        'precision_micro': precision_score(y_test, predictions, average='micro', zero_division=0),
        'recall_micro': recall_score(y_test, predictions, average='micro', zero_division=0),
        'f1_micro': f1_score(y_test, predictions, average='micro', zero_division=0)
    })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a c

In [11]:
# Convert results to DataFrames
df_pixel_metrics_rolling_mean_diff = pd.DataFrame(pixel_metrics_rolling_mean_diff)  # Table 1
df_pixel_metrics_aggregated_rolling_mean_diff = pd.DataFrame(pixel_metrics_aggregated_rolling_mean_diff)  # Table 2
df_full_predictions_rolling_mean_diff = pd.concat(full_predictions_rolling_mean_diff).reset_index(drop=True)  # Table 3

In [12]:

# Save results to CSV
df_pixel_metrics_rolling_mean_diff.to_csv('cell1320_metrics_separate_diff.csv', index=False)
df_pixel_metrics_aggregated_rolling_mean_diff.to_csv('cell1320_metrics_aggregated_diff.csv', index=False)
df_full_predictions_rolling_mean_diff.to_csv('cell1320_predictions_diff.csv', index=False)

# ML combination backscatter, slope_diff_diff, curvature_diff_diff

Train ML model for all pixels that contain both target classes (both 0 and 1)

In [13]:
# Select relevant features plus rolling means for 60 days
features_rolling_mean_diff_diff = ['diff_of_difference_of_slope40', 'diff_of_difference_of_curvature40', 'backscatter40']

# Tables to store the results
pixel_metrics_rolling_mean_diff_diff = []  # Table 1 (Separate for Each Class)
pixel_metrics_aggregated_rolling_mean_diff_diff = []  # Table 2 (Aggregated Metrics)
full_predictions_rolling_mean_diff_diff = []  # Table 3 (Full Data with Predictions)

# Train and evaluate a model for each pixel with both classes
for target_pixel in pixels_with_both_classes:
    # print(f'\nTraining model for pixel {target_pixel}')
    data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel
    
    # Check the number of rows for this pixel
    # num_rows = len(data_pixel)
    # print(f'Number of rows for pixel {target_pixel}: {num_rows}')
    
    # Prepare features and target
    X = data_pixel[features_rolling_mean_diff_diff]
    y = data_pixel['target']
    
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Initialize and train the model
    model = RandomForestClassifier(random_state=1)
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # # Prints to evaluate the model
    # print(f"--- Random Forest for Pixel {target_pixel} ---")
    # print(classification_report(y_test, predictions))
    
    # # Print confusion matrix (absolute values)
    # cm = confusion_matrix(y_test, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    # plt.title(f'Confusion Matrix (Absolute) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()
    
    # # Print confusion matrix (relative values / percentages)
    # cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize by row (actual values)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm_percentage, annot=True, fmt='.2%', cmap='Blues')
    # plt.title(f'Confusion Matrix (Percentage) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()

    # Store predictions in the full dataset
    data_pixel.loc[y_test.index, 'prediction'] = predictions
    full_predictions_rolling_mean_diff_diff.append(data_pixel)
    
    # Compute metrics for each class
    for class_label in [0, 1]:
        class_mask = (y_test == class_label)
        if np.sum(class_mask) > 0:  # Avoid division by zero
            pixel_metrics_rolling_mean_diff_diff.append({
                'gpi_ascat': target_pixel,
                'class': class_label,
                'accuracy': accuracy_score(y_test[class_mask], predictions[class_mask]),
                'precision': precision_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'recall': recall_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'f1_score': f1_score(y_test, predictions, pos_label=class_label, zero_division=0)
            })
    
    # Compute aggregated (micro/macro) metrics for the pixel
    pixel_metrics_aggregated_rolling_mean_diff_diff.append({
        'gpi_ascat': target_pixel,
        'accuracy': accuracy_score(y_test, predictions),
        'precision_macro': precision_score(y_test, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(y_test, predictions, average='macro', zero_division=0),
        'f1_macro': f1_score(y_test, predictions, average='macro', zero_division=0),
        'precision_micro': precision_score(y_test, predictions, average='micro', zero_division=0),
        'recall_micro': recall_score(y_test, predictions, average='micro', zero_division=0),
        'f1_micro': f1_score(y_test, predictions, average='micro', zero_division=0)
    })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a c

In [14]:
# Convert results to DataFrames
df_pixel_metrics_rolling_mean_diff_diff = pd.DataFrame(pixel_metrics_rolling_mean_diff_diff)  # Table 1
df_pixel_metrics_aggregated_rolling_mean_diff_diff = pd.DataFrame(pixel_metrics_aggregated_rolling_mean_diff_diff)  # Table 2
df_full_predictions_rolling_mean_diff_diff = pd.concat(full_predictions_rolling_mean_diff_diff).reset_index(drop=True)  # Table 3

In [15]:

# Save results to CSV
df_pixel_metrics_rolling_mean_diff_diff.to_csv('cell1320_metrics_separate_diff_diff.csv', index=False)
df_pixel_metrics_aggregated_rolling_mean_diff_diff.to_csv('cell1320_metrics_aggregated_diff_diff.csv', index=False)
df_full_predictions_rolling_mean_diff_diff.to_csv('cell1320_predictions_diff_diff.csv', index=False)

# ML combination backscatter, slope_diff_diff, curvature_diff_diff

sigma40 (backscatter), slope40, curvature40, difference of slopes, difference of curvatures, difference of differences (slope), difference of differences (curvature)

Train ML model for all pixels that contain both target classes (both 0 and 1)

In [4]:
# Select relevant features plus rolling means for 60 days
features_combination = ['difference_of_slope40', 'difference_of_curvature40', 'diff_of_difference_of_slope40', 'diff_of_difference_of_curvature40', 'slope40', 'curvature40', 'backscatter40']

# Tables to store the results
pixel_metrics_combination = []  # Table 1 (Separate for Each Class)
pixel_metrics_aggregated_combination = []  # Table 2 (Aggregated Metrics)
full_predictions_combination = []  # Table 3 (Full Data with Predictions)

# Train and evaluate a model for each pixel with both classes
for target_pixel in pixels_with_both_classes:
    # print(f'\nTraining model for pixel {target_pixel}')
    data_pixel = data[data['gpi_ascat'] == target_pixel]  # Filter data for the selected pixel
    
    # Check the number of rows for this pixel
    # num_rows = len(data_pixel)
    # print(f'Number of rows for pixel {target_pixel}: {num_rows}')
    
    # Prepare features and target
    X = data_pixel[features_combination]
    y = data_pixel['target']
    
    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    
    # Feature scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Initialize and train the model
    model = RandomForestClassifier(random_state=1)
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # # Prints to evaluate the model
    # print(f"--- Random Forest for Pixel {target_pixel} ---")
    # print(classification_report(y_test, predictions))
    
    # # Print confusion matrix (absolute values)
    # cm = confusion_matrix(y_test, predictions)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    # plt.title(f'Confusion Matrix (Absolute) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()
    
    # # Print confusion matrix (relative values / percentages)
    # cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # Normalize by row (actual values)
    # plt.figure(figsize=(6, 4))
    # sns.heatmap(cm_percentage, annot=True, fmt='.2%', cmap='Blues')
    # plt.title(f'Confusion Matrix (Percentage) for Pixel {target_pixel}')
    # plt.xlabel('Predicted')
    # plt.ylabel('Actual')
    # plt.show()

    # Store predictions in the full dataset
    data_pixel.loc[y_test.index, 'prediction'] = predictions
    full_predictions_combination.append(data_pixel)
    
    # Compute metrics for each class
    for class_label in [0, 1]:
        class_mask = (y_test == class_label)
        if np.sum(class_mask) > 0:  # Avoid division by zero
            pixel_metrics_combination.append({
                'gpi_ascat': target_pixel,
                'class': class_label,
                'accuracy': accuracy_score(y_test[class_mask], predictions[class_mask]),
                'precision': precision_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'recall': recall_score(y_test, predictions, pos_label=class_label, zero_division=0),
                'f1_score': f1_score(y_test, predictions, pos_label=class_label, zero_division=0)
            })
    
    # Compute aggregated (micro/macro) metrics for the pixel
    pixel_metrics_aggregated_combination.append({
        'gpi_ascat': target_pixel,
        'accuracy': accuracy_score(y_test, predictions),
        'precision_macro': precision_score(y_test, predictions, average='macro', zero_division=0),
        'recall_macro': recall_score(y_test, predictions, average='macro', zero_division=0),
        'f1_macro': f1_score(y_test, predictions, average='macro', zero_division=0),
        'precision_micro': precision_score(y_test, predictions, average='micro', zero_division=0),
        'recall_micro': recall_score(y_test, predictions, average='micro', zero_division=0),
        'f1_micro': f1_score(y_test, predictions, average='micro', zero_division=0)
    })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_pixel.loc[y_test.index, 'prediction'] = predictions
A value is trying to be set on a c

In [5]:
# Convert results to DataFrames
df_pixel_metrics_combination = pd.DataFrame(pixel_metrics_combination)  # Table 1
df_pixel_metrics_aggregated_combination = pd.DataFrame(pixel_metrics_aggregated_combination)  # Table 2
df_full_predictions_combination = pd.concat(full_predictions_combination).reset_index(drop=True)  # Table 3

In [6]:

# Save results to CSV
df_pixel_metrics_combination.to_csv('cell1320_metrics_separate_combination.csv', index=False)
df_pixel_metrics_aggregated_combination.to_csv('cell1320_metrics_aggregated_combination.csv', index=False)
df_full_predictions_combination.to_csv('cell1320_predictions_combination.csv', index=False)