In [1]:
import os
os.chdir('..')

In [2]:
import json
import torch
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pytorch_lightning as pl
from tqdm.notebook import tqdm
from collections import defaultdict
from sklearn.metrics import mean_squared_error, mean_absolute_error
from torch.utils.data import DataLoader
from modules.constants import feature_list
from modules.data import GapFillingDataset
from modules.mlp import MLP
from modules.baseline import Baseline
from modules.gapt import GapT

In [3]:
seq_len = 'two_week' # 'four_week'

os.makedirs(f'results/{seq_len}', exist_ok=True)

In [4]:
output_dir = f'results/mlp_{seq_len}_seq'

# Load metadata
with open(os.path.join(output_dir, 'metadata.json'), 'r') as f:
    mlp_metadata = json.load(f)

# Extract arguments from metadata
args = mlp_metadata['args']

# Initialize model from checkpoint
mlp = MLP.load_from_checkpoint(
    checkpoint_path=os.path.join(args['output_dir'], 'model.ckpt'),
    d_input=len(feature_list), 
    d_output=args['d_output'],
    learning_rate=args['learning_rate'],
    dropout_rate=args['dropout_rate'],
    optimizer=args['optimizer'],
)

In [5]:
output_dir = f'results/baseline_{seq_len}_seq'

# Load metadata
with open(os.path.join(output_dir, 'metadata.json'), 'r') as f:
    baseline_metadata = json.load(f)

# Extract arguments from metadata
args = baseline_metadata['args']

# Initialize model from checkpoint
baseline = Baseline.load_from_checkpoint(
    checkpoint_path=os.path.join(args['output_dir'], 'model.ckpt'),
    d_input=len(feature_list), 
    n_head=args['n_head'], 
    d_model=args['d_model'], 
    d_output=args['d_output'],
    d_embedding=args['d_embedding'],
    learning_rate=args['learning_rate'],
    dropout_rate=args['dropout_rate'],
    optimizer=args['optimizer'],
)

In [6]:
output_dir = f'results/gapt_naive_{seq_len}_seq'

# Load metadata
with open(os.path.join(output_dir, 'metadata.json'), 'r') as f:
    gapt_naive_metadata = json.load(f)

# Extract arguments from metadata
args = gapt_naive_metadata['args']

# Initialize model from checkpoint
gapt_naive = GapT.load_from_checkpoint(
    checkpoint_path=os.path.join(args['output_dir'], 'model.ckpt'),
    d_input=len(feature_list), 
    n_head=args['n_head'],
    d_feedforward=args['d_feedforward'],
    d_model=args['d_model'], 
    n_layers=args['n_layers'], 
    d_output=args['d_output'],
    learning_rate=args['learning_rate'],
    dropout_rate=args['dropout_rate'],
    optimizer=args['optimizer'],
    mode=args['mode'],
)

In [7]:
output_dir = f'results/gapt_default_{seq_len}_seq'

# Load metadata
with open(os.path.join(output_dir, 'metadata.json'), 'r') as f:
    gapt_metadata = json.load(f)

# Extract arguments from metadata
args = gapt_metadata['args']

# Initialize model from checkpoint
gapt = GapT.load_from_checkpoint(
    checkpoint_path=os.path.join(args['output_dir'], 'model.ckpt'),
    d_input=len(feature_list), 
    n_head=args['n_head'],
    d_feedforward=args['d_feedforward'],
    d_model=args['d_model'], 
    n_layers=args['n_layers'], 
    d_output=args['d_output'],
    learning_rate=args['learning_rate'],
    dropout_rate=args['dropout_rate'],
    optimizer=args['optimizer'],
    mode=args['mode'],
)

In [8]:
with open(os.path.join(args['data_dir'], 'paths.json'), 'r') as f:
    data_paths = json.load(f)

test_dataset = GapFillingDataset(data_paths['test'], gapt_metadata['feature_list'])
test_dataloader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

### Calculate metrics

In [9]:
# Set models to eval mode
mlp.eval()
baseline.eval()
gapt_naive.eval()
gapt.eval()

# Initialize a dictionary to store metrics per key
metrics_by_key = defaultdict(lambda: defaultdict(list))

# Loop over batches
for batch in tqdm(test_dataloader):
    
    with torch.no_grad():
        mlp_predictions = mlp(batch)
        baseline_predictions = baseline(batch)
        gapt_naive_predictions = gapt_naive(batch)
        gapt_predictions = gapt(batch)
    
    target = batch['target'].cpu().detach().numpy()
    baseline_predictions = baseline_predictions.cpu().detach().numpy()
    gapt_predictions = gapt_predictions.cpu().detach().numpy()
    mlp_predictions = mlp_predictions.cpu().detach().numpy()
    gapt_naive_predictions = gapt_naive_predictions.cpu().detach().numpy()
    mask = batch['mask'].cpu().detach().numpy()
    file = batch['file']
    
    inverted_mask = ~mask

    for idx in range(target.shape[0]):
        key = file[idx].split('/')[-1].split('.')[0].split('_')[0]
        true_values = np.exp(target[idx][inverted_mask[idx]])
        
        baseline_pred = np.exp(baseline_predictions[idx][inverted_mask[idx]])
        gapt_pred = np.exp(gapt_predictions[idx][inverted_mask[idx]])
        mlp_pred = np.exp(mlp_predictions[idx][inverted_mask[idx]])
        gapt_naive_pred = np.exp(gapt_naive_predictions[idx][inverted_mask[idx]])
        
        if 'target_values' not in metrics_by_key[key]:
            metrics_by_key[key]['target_values'] = []
        metrics_by_key[key]['target_values'].extend(list(true_values))

        # Accumulate errors by key       
        metrics_by_key[key]['baseline_rmse'].append(np.sqrt(mean_squared_error(true_values, baseline_pred)))
        metrics_by_key[key]['gapt_naive_rmse'].append(np.sqrt(mean_squared_error(true_values, gapt_naive_pred)))
        metrics_by_key[key]['gapt_rmse'].append(np.sqrt(mean_squared_error(true_values, gapt_pred)))
        metrics_by_key[key]['mlp_rmse'].append(np.sqrt(mean_squared_error(true_values, mlp_pred)))
        
        metrics_by_key[key]['mlp_mae'].append(mean_absolute_error(true_values, mlp_pred))
        metrics_by_key[key]['baseline_mae'].append(mean_absolute_error(true_values, baseline_pred))
        metrics_by_key[key]['gapt_naive_mae'].append(mean_absolute_error(true_values, gapt_naive_pred))
        metrics_by_key[key]['gapt_mae'].append(mean_absolute_error(true_values, gapt_pred))

        metrics_by_key[key]['mlp_mbe'].append(np.mean(mlp_pred - true_values))
        metrics_by_key[key]['baseline_mbe'].append(np.mean(baseline_pred - true_values))
        metrics_by_key[key]['gapt_naive_mbe'].append(np.mean(gapt_naive_pred - true_values))
        metrics_by_key[key]['gapt_mbe'].append(np.mean(gapt_pred - true_values))        

  0%|          | 0/124 [00:00<?, ?it/s]

In [10]:
with open(f'results/{seq_len}/station_metrics.pkl', 'wb') as f:
    pickle.dump(dict(metrics_by_key), f)

**How to read the data from file:**

In [11]:
with open(f'results/{seq_len}/station_metrics.pkl', 'rb') as f:
    metrics_by_key = pickle.load(f)

metrics_by_key = defaultdict(lambda: defaultdict(list), metrics_by_key)

In [12]:
results_by_key = {}
for key, metrics in tqdm(metrics_by_key.items()):
    results_by_key[key] = {}
    
    # Calculate average target value for each key
    avg_target_value = np.mean(metrics['target_values'])
    results_by_key[key]['average_target'] = float(avg_target_value)
    
    for metric_name, values in metrics.items():
        if metric_name != 'target_values':
            mean_val = np.mean(values)
            std_val = np.std(values)
            results_by_key[key][metric_name + '_mean'] = float(mean_val)
            results_by_key[key][metric_name + '_std'] = float(std_val)

  0%|          | 0/31 [00:00<?, ?it/s]

In [13]:
with open(f'results/{seq_len}/station_metrics.json', 'w') as f:
    json.dump(results_by_key, f, indent=4)

In [14]:
# Initialize dictionaries to store metrics for each model
metrics = {
    'mlp': {'rmse': [], 'mae': [], 'mbe': [], 'nrmse': [], 'nmae': [], 'nmbe': []},
    'baseline': {'rmse': [], 'mae': [], 'mbe': [], 'nrmse': [], 'nmae': [], 'nmbe': []},
    'gapt_naive': {'rmse': [], 'mae': [], 'mbe': [], 'nrmse': [], 'nmae': [], 'nmbe': []},
    'gapt': {'rmse': [], 'mae': [], 'mbe': [], 'nrmse': [], 'nmae': [], 'nmbe': []}
}

# Extract metrics for each model across all stations and calculate normalized metrics
for key, value in metrics_by_key.items():
    target_mean = np.mean(value['target_values'])
    
    for model_name in metrics:
        metrics[model_name]['rmse'].extend(value[f'{model_name}_rmse'])
        metrics[model_name]['mae'].extend(value[f'{model_name}_mae'])
        metrics[model_name]['mbe'].extend(value[f'{model_name}_mbe'])

        # Normalized metrics
        metrics[model_name]['nrmse'].extend([x/target_mean for x in value[f'{model_name}_rmse']])
        metrics[model_name]['nmae'].extend([x/target_mean for x in value[f'{model_name}_mae']])
        metrics[model_name]['nmbe'].extend([x/target_mean for x in value[f'{model_name}_mbe']])

# Compute mean and standard deviation for each metric of each model
results = {}
for model_name, model_metrics in metrics.items():
    results[model_name] = {}
    for metric_name, metric_values in model_metrics.items():
        mean_value = np.mean(metric_values)
        std_value = np.std(metric_values)
        results[model_name][f'{metric_name}_mean'] = float(mean_value)
        results[model_name][f'{metric_name}_std'] = float(std_value)

with open(f'results/{seq_len}/metrics.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)

### Plot comparisons

In [15]:
mlp.eval()
baseline.eval()
gapt_naive.eval()
gapt.eval()

n = 0
num_batches = 5

for ext in ['png', 'pdf']:
    os.makedirs(f'figures/{seq_len}/{ext}', exist_ok=True)

with open('data/measurement_sites.json', 'r') as file:
    measurement_sites = pd.read_json(file)
    
for batch in test_dataloader:
    
    with torch.no_grad():
        mlp_predictions = np.exp(mlp(batch)).numpy()
        baseline_predictions = np.exp(baseline(batch)).numpy()
        gapt_naive_predictions = np.exp(gapt_naive(batch)).numpy()
        gapt_predictions = np.exp(gapt(batch)).numpy()
    
    date = batch['unix_date'].numpy()
    mask = batch['mask'].numpy()
    target = np.exp(batch['target'].numpy())
    file = batch['file']
    
    inverted_mask = ~mask

    for idx in tqdm(range(gapt_metadata['args']['batch_size'])):
        mlp_rmse = np.sqrt(mean_squared_error(target[idx][inverted_mask[idx]], mlp_predictions[idx][inverted_mask[idx]]))
        baseline_rmse = np.sqrt(mean_squared_error(target[idx][inverted_mask[idx]], baseline_predictions[idx][inverted_mask[idx]]))
        gapt_naive_rmse = np.sqrt(mean_squared_error(target[idx][inverted_mask[idx]], gapt_naive_predictions[idx][inverted_mask[idx]]))
        gapt_rmse = np.sqrt(mean_squared_error(target[idx][inverted_mask[idx]], gapt_predictions[idx][inverted_mask[idx]]))
    
        sample_date = pd.to_datetime(date[idx], unit='s')
        
        gap_indices = np.where(mask[idx] == False)
        
        key = file[idx].split('/')[-1].split('.')[0].split('_')[0]
        station_name = measurement_sites[key]['station_name']
        environment_type = ' '.join(measurement_sites[key]['environment_type'].split('_'))

        plt.figure(figsize=(10, 5))
        plt.plot(sample_date, mlp_predictions[idx], label=f'MLP (RMSE={mlp_rmse:.2f})', c='tab:blue')
        plt.plot(sample_date, baseline_predictions[idx], label=f'Richard et al. (RMSE={baseline_rmse:.2f})', c='tab:orange')
        plt.plot(sample_date, gapt_naive_predictions[idx], label=f'GapT naive (RMSE={gapt_naive_rmse:.2f})', c='tab:green')
        plt.plot(sample_date, gapt_predictions[idx], label=f'GapT (RMSE={gapt_rmse:.2f})', c='tab:purple')
        plt.plot(sample_date, target[idx], label='Ground truth', c='tab:red')
        plt.xlabel('Date')
        plt.ylabel(r'N100 Concentration (cm$^{-3}$)')
        plt.title(f'{station_name} ({environment_type})')
        plt.legend(frameon=False)
        
        segment = file[idx].split('/')[-1].split('.')[0]
        for ext in ['png', 'pdf']:
            pdf_save_path = f'figures/{seq_len}/{ext}/{segment}_{n}_{idx}.{ext}'
            plt.savefig(pdf_save_path)

        plt.close()
        
    n += 1
    if n == num_batches:
        break

  0%|          | 0/256 [00:00<?, ?it/s]

  0%|          | 0/256 [00:00<?, ?it/s]

  0%|          | 0/256 [00:00<?, ?it/s]

  0%|          | 0/256 [00:00<?, ?it/s]

  0%|          | 0/256 [00:00<?, ?it/s]