In [44]:
import netCDF4
import os
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model

In [ ]:
CMROOT = '/Users/axt5780/Library/CloudStorage/OneDrive-ThePennsylvaniaStateUniversity/PIML_project/IMERG_vectors/'
FILSTR = 'IMERG_vec_201801.nc'

nc = netCDF4.Dataset(os.path.join(CMROOT, FILSTR)) # reading the nc file and creating Dataset

In [36]:
nc.variables

{'time': <class 'netCDF4.Variable'>
 int64 time(time)
     units: minutes since 2018-01-01 00:00:00.000000
     calendar: julian
 unlimited dimensions: 
 current shape = (1488,)
 filling on, default _FillValue of -9223372036854775806 used,
 'idx': <class 'netCDF4.Variable'>
 int64 idx(idx)
 unlimited dimensions: 
 current shape = (39125,)
 filling on, default _FillValue of -9223372036854775806 used,
 'pmmhr': <class 'netCDF4.Variable'>
 float32 pmmhr(time, idx)
     _FillValue: nan
 unlimited dimensions: 
 current shape = (1488, 39125)
 filling on,
 'lat': <class 'netCDF4.Variable'>
 float32 lat(time, idx)
     _FillValue: nan
 unlimited dimensions: 
 current shape = (1488, 39125)
 filling on,
 'lon': <class 'netCDF4.Variable'>
 float32 lon(time, idx)
     _FillValue: nan
 unlimited dimensions: 
 current shape = (1488, 39125)
 filling on}

In [28]:
t, d = nc.variables['pmmhr'].shape

In [39]:
train, test = train_test_split(nc.variables['pmmhr'], test_size=0.2)

In [47]:
class Autoencoder(Model):
  def __init__(self, latent_dim, shape):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim
    self.shape = shape
    self.encoder = tf.keras.Sequential([
      layers.Flatten(),
      layers.Dense(latent_dim, activation='relu'),
    ])
    self.decoder = tf.keras.Sequential([
      layers.Dense(tf.math.reduce_prod(shape).numpy(), activation='sigmoid'),
      layers.Reshape(shape)
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded


shape = test.shape[1:]
latent_dim = 64
autoencoder = Autoencoder(latent_dim, shape)

In [48]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())


In [49]:
autoencoder.fit(train, train,
                epochs=10,
                shuffle=True,
                validation_data=(test, test))

Epoch 1/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - loss: 0.2102 - val_loss: 0.0534
Epoch 2/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 83ms/step - loss: 0.0439 - val_loss: 0.0480
Epoch 3/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 90ms/step - loss: 0.0408 - val_loss: 0.0471
Epoch 4/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 89ms/step - loss: 0.0385 - val_loss: 0.0467
Epoch 5/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - loss: 0.0388 - val_loss: 0.0465
Epoch 6/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 176ms/step - loss: 0.0414 - val_loss: 0.0463
Epoch 7/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 150ms/step - loss: 0.0360 - val_loss: 0.0462
Epoch 8/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 195ms/step - loss: 0.0361 - val_loss: 0.0462
Epoch 9/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x135e94cd0>

In [50]:
encoded_frames = autoencoder.encoder(test).numpy()

In [55]:
encoded_frames.shape

(298, 64)