In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
%pylab inline
%load_ext line_profiler
import xarray as xr
from tqdm.autonotebook import tqdm
import pytorch_lightning as pl
from pytorch_lightning.callbacks import Callback

# Set a bigger default plot size
mpl.rcParams['figure.figsize'] = (10, 8)
mpl.rcParams['font.size'] = 16

from hydrogen_pg.dataloaders.taylor_example_dataloader import Conv2dDataset
from hydrogen_pg.dataloaders.taylor_example_dataloader import Conv2dDataModule
from hydrogen_pg.models.taylor_example_model import RMM_NN_2D_B1
from hydrogen_pg.utils.callbacks import MetricsCallback

Populating the interactive namespace from numpy and matplotlib


  from tqdm.autonotebook import tqdm


In [26]:
pfmeta_file = '/home/ab6361/small_CONUS1_2003_fake.out.pfmetadata'
in_vars = ['precipitation', 'temperature', 'pressure']
out_vars = ['pressure']

# Surface pressure
z_strategy = 0
patch_sizes = {'x': 50, 'y': 50}
raw_isel_args = {'time': [0, 1]}
max_patches = 100
batch_size = 100

In [28]:
datamodule = Conv2dDataModule(
    pfmeta_file, 
    raw_isel_args=raw_isel_args,
    in_vars=in_vars, 
    out_vars=out_vars, 
    z_strategy=z_strategy,
    patch_sizes=patch_sizes,
    max_patches=max_patches,
    batch_size=batch_size
)

model = RMM_NN_2D_B1(grid_size=datamodule.shape,
                     in_vars=in_vars,
                     out_vars=out_vars)

model.configure_optimizers()
model.configure_loss()

data_shape  = datamodule.shape
data_in_features, data_out_features = datamodule.feature_names
model_shape = model.shape
model_in_features, model_out_features = model.feature_names

assert data_shape == model_shape
assert data_in_features == model_in_features
assert data_out_features == model_out_features



In [29]:
datamodule.setup('fit')
tl = datamodule.train_dataloader()

Error Reading Subgrid Header: Invalid argument


In [30]:
datamodule._full.ds

In [None]:
metrics = MetricsCallback()
trainer = pl.Trainer(max_epochs=10, gpus=1, callbacks=[metrics])
trainer.fit(model, datamodule)

Now that I've got data issues sorted out I want to more completely stress test the CNN that Elena has been using on CONUS, but noticing a "true" random sampling is really not optimal for data loading (meaning, I guess I don't completely have data issues sorted out). The problem is more or less this:

The dimensions of the CONUS data are (time, z, y, x) for each variable where each time slice lives in it's own file. The approximate sizes of these dimensions (ignoring time) are (5, 1888, 3342)  The dimensions of an input sample for  the CNN is (channels, y_sub, x_sub) where channels are the number of input variables, and y_sub, x_sub are the window size that I sample with, currently the lengths are (50, 50) . So we can take many spatial samples from a file, but only a single time sample. There is non-negligible 

In [None]:
plt.plot(metrics.metrics['train_loss'], label='Train loss')
plt.plot(metrics.metrics['val_loss'], label='Validation loss')
plt.legend()
plt.xlabel('Epoch #')
plt.ylabel('MSE Loss')
plt.ylim([0.00, 0.02])

In [None]:
val_x, val_y = next(iter(dataloader.val_dataloader()))
val_x = val_x[0:1]
val_y = val_y[0:1].squeeze().cpu().detach().numpy()

val_yhat = model(val_x)
val_yhat = val_yhat.squeeze().cpu().detach().numpy()
err = val_y - val_yhat

In [None]:
fig, axes = plt.subplots(1, 5, figsize=(22, 7),
                         gridspec_kw={'width_ratios': [1, 1, 0.1, 1, 0.1], 'height_ratios': [1]})

sm = axes[0].imshow(val_y, vmin=0, vmax=1)
axes[1].imshow(val_yhat, vmin=0, vmax=1)
plt.colorbar(sm, cax=axes[2])
sm = axes[3].imshow(err, cmap='coolwarm_r')
plt.colorbar(sm, cax=axes[-1])

for ax in axes[[0,1,3]]:
    ax.axis('off')
axes[0].set_title('True Saturation')
axes[1].set_title('Predicted Saturation')
axes[3].set_title('Error')