In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
from datetime import timedelta
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.animation as animation
from matplotlib.animation import FuncAnimation
from matplotlib.colors import Normalize
import numpy as np
import gymnasium as gym
from gymnasium import spaces

In [2]:
#Reading the Data File

file_path = 'zip://clustered_data_4months.zip!clustered_data_4months (2).geojson'
data = gpd.read_file(file_path, driver="GeoJSON")


data.head(), data.columns

print(data)
print(data.columns)
print(len(data))
print(len(data[data['cnprcp_mean']>0]))

  return ogr_read(


                           time satellite instrument    swath_width  \
0     2024-05-22 00:00:00+00:00      Test       Test  278640.704057   
1     2024-05-22 00:02:00+00:00      Test       Test  277041.710508   
2     2024-05-22 00:04:00+00:00      Test       Test  275419.732317   
3     2024-05-22 00:06:00+00:00      Test       Test  273866.283658   
4     2024-05-22 00:08:00+00:00      Test       Test  272466.738302   
...                         ...       ...        ...            ...   
94475 2024-09-22 00:22:00+00:00      Test       Test  269537.568606   
94476 2024-09-22 00:24:00+00:00      Test       Test  267843.485083   
94477 2024-09-22 00:26:00+00:00      Test       Test  266231.255770   
94478 2024-09-22 00:28:00+00:00      Test       Test  264783.666193   
94479 2024-09-22 00:30:00+00:00      Test       Test  263569.285624   

       valid_obs  solar_hour time_range  month    lat_sat     lon_sat  \
0           True    9.006320    morning      5 -38.142798  134.265144   
1

In [3]:
# Preprocessing timestamp
data['time'] = pd.to_datetime(data['time'])


data['time_step'] = (data['time'] - data['time'].min()).dt.total_seconds()

In [4]:
world = gpd.read_file('110m_cultural.zip', layer = 'ne_110m_admin_0_boundary_lines_land')
geometry = [Point(xy) for xy in zip(data['lon_sat'], data['lat_sat'])]
geo_full = gpd.GeoDataFrame(data, geometry=geometry)
geo_full['ground_track'] = geo_full.apply(lambda row: 0 if world.contains(row.geometry).any() else 1, axis=1)
data['ground_track'] = geo_full['ground_track']

In [5]:
data['lat_rad'] = np.radians(data['lat_sat'])
data['lon_rad'] = np.radians(data['lon_sat'])

data['x'] = np.cos(data['lat_rad']) * np.cos(data['lon_rad'])
data['y'] = np.cos(data['lat_rad']) * np.sin(data['lon_rad'])
data['z'] = np.sin(data['lat_rad'])

In [6]:
encoder = OneHotEncoder(sparse_output=False)
time_range_encoded = encoder.fit_transform(data[['time_range']])
time_range_feature_names = encoder.get_feature_names_out(['time_range'])
time_range_df = pd.DataFrame(time_range_encoded, columns=time_range_feature_names, index=data.index)
data = pd.concat([data, time_range_df], axis=1)

In [7]:
scaler = MinMaxScaler()
cols_to_scale = ['x','y','z']
scaled_values = scaler.fit_transform(data[cols_to_scale])
data['x_norm'] = scaled_values[:, 0]
data['y_norm'] = scaled_values[:, 1]
data['z_norm'] = scaled_values[:, 2]

In [31]:
class SatelliteEnv(gym.Env):
    def __init__(self, df, alpha=1.0, gamma=1.0, mu=1.0):
        super(SatelliteEnv, self).__init__()
        self.df = df.reset_index(drop=True)
        self.n_steps = len(self.df)
        self.alpha = alpha
        self.gamma = gamma
        self.mu = mu

        # Action space: 0 = don't act, 1 = act.
        self.action_space = spaces.Discrete(2)

        # One-hot encoded time_range columns.
        self.one_hot_cols = [col for col in df.columns if col.startswith('time_range_')]
        n_one_hot = len(self.one_hot_cols)

        self.obs_dim = 4 + n_one_hot
        self.observation_space = spaces.Box(low=0.0, high=1.0, shape=(self.obs_dim,), dtype=np.float32)
        self._index = 0

    # Define reset method
    def reset(self, seed=None, options=None):
        self._index = 0
        return self._get_obs(), {}
    
    # Define step method
    def step(self, action):
        reward = self._compute_reward(action)
        self._index += 1
        done = (self._index >= self.n_steps - 1)
        obs = self._get_obs() if not done else np.zeros(self.obs_dim, dtype=np.float32)
        terminated = done  # assuming episode termination
        truncated = False  # assuming no truncation
        return obs, reward, terminated, truncated, {}
    
    # Define the State-Space
    def _get_obs(self):
        row = self.df.iloc[self._index]
        # states : x_norm, y_norm, z_norm, ground_track.
        cont_features = np.array([
            row['x_norm'],
            row['y_norm'],
            row['z_norm'],
            float(row['ground_track'])
        ], dtype=np.float32)
        # One-hot encoded time_range features:
        one_hot_features = row[self.one_hot_cols].values.astype(np.float32)
        # Concatenate the encoded vector and state features to form the full state.
        obs = np.concatenate([cont_features, one_hot_features])
        return obs
    
    # Define the Reward Function
    # correct decision*intensity
    def _compute_reward(self, action):
        row = self.df.iloc[self._index]
        cnprcp_mean = row['cnprcp_mean']
        scale = 10000  # scaling factor to amplify small precipitation values
        if action == 1:
            if cnprcp_mean > 0:
                # Correct detection: give a base reward of 1 plus bonus proportional to intensity.
                reward = 1 + (cnprcp_mean * scale)
            else:
                # False positive: a small penalty for acting when there's no precipitation.
                reward = -0.1
        else:  # action == 0
            if cnprcp_mean > 0:
                # False negative: add a moderate penalty for missing a precipitation event.
                reward = -0.01
            else:
                reward = 0
        return reward

In [9]:
import torch
print(torch.backends.mps.is_available())

True


In [32]:
from sb3_contrib import QRDQN 
from stable_baselines3.common.env_checker import check_env

all_f1_scores = []
all_precision_scores = []
all_recall_scores = []
all_rewards = []

tscv = TimeSeriesSplit(n_splits=3)

for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(data)):
    train_df = data.iloc[train_idx]
    test_df = data.iloc[test_idx]

    train_env = SatelliteEnv(train_df, alpha=20.0, gamma=1.0, mu=1.0)
    check_env(train_env, warn=True)

    # 3) Train the QRDQN model (a distributional RL algorithm)
    model = QRDQN("MlpPolicy", train_env, verbose=0)
    model.learn(total_timesteps=100000)  

    # 4) Evaluate on the test split
    test_env = SatelliteEnv(test_df, alpha=20.0, gamma=1.0, mu=1.0)
    obs, _ = test_env.reset()

    done = False
    total_reward = 0.0
    predicted_actions = []
    actual_labels = []

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        predicted_actions.append(action)
        
        # Get actual label from cnprcp_mean: 1 if > 0, else 0.
        current_cprcp = test_env.df.iloc[test_env._index]['cnprcp_mean']
        actual_label = 1 if current_cprcp > 0 else 0
        actual_labels.append(actual_label)

        obs, reward, terminated, truncated, info = test_env.step(action)
        total_reward += reward
        done = terminated or truncated

    f1 = f1_score(actual_labels, predicted_actions, zero_division=0)
    precision = precision_score(actual_labels, predicted_actions, zero_division=0)
    recall = recall_score(actual_labels, predicted_actions, zero_division=0)
    all_f1_scores.append(f1)
    all_precision_scores.append(precision)
    all_recall_scores.append(recall)
    all_rewards.append(total_reward)
    print(f"Fold {fold_idx}: F1_Score = {f1:.3f}, Precision_Score = {precision:.3f}, Recall_Score = {recall:.3f}, Total Reward = {total_reward:.3f}")

mean_f1 = np.mean(all_f1_scores)
mean_reward = np.mean(all_rewards)
print(f"\nOverall: Mean F1 = {mean_f1:.3f}, Mean Cumulative Reward = {mean_reward:.3f}")

Fold 0: F1_Score = 0.095, Precision_Score = 0.050, Recall_Score = 0.898, Total Reward = -296.734
Fold 1: F1_Score = 0.129, Precision_Score = 0.077, Recall_Score = 0.407, Total Reward = 191.068
Fold 2: F1_Score = 0.083, Precision_Score = 0.044, Recall_Score = 0.729, Total Reward = -203.417

Overall: Mean F1 = 0.102, Mean Cumulative Reward = -103.028
