In [2]:
import numpy as np
import xarray as xr
import pandas as pd
from glob import glob
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv2D, Dropout, Activation, UpSampling2D, GlobalMaxPooling2D, multiply
from tensorflow.keras.backend import max
from keras_unet_collection import models, base, utils

In [3]:
# Pull all data files

frontobject_conus_path = "E:/FrontsProjectData/pickle_files/*/*/*/FrontObjects*conus.pkl"
frontobject_window_path = "E:/FrontsProjectData/pickle_files/*/*/*/FrontObjects*lon*lat*.pkl"
surfacedata_conus_path = "E:/FrontsProjectData/pickle_files/*/*/*/SurfaceData*conus.pkl"
surfacedata_window_path = "E:/FrontsProjectData/pickle_files/*/*/*/SurfaceData*lon*lat*.pkl"

frontobject_conus_files = sorted(glob(frontobject_conus_path))
frontobject_window_files = sorted(glob(frontobject_window_path))
surfacedata_conus_files = sorted(glob(surfacedata_conus_path))
surfacedata_window_files = sorted(glob(surfacedata_window_path))

print("FrontObject CONUS files: %d" % len(frontobject_conus_files))
print("FrontObject window files: %d" % len(frontobject_window_files))
print("SurfaceData CONUS files: %d" % len(surfacedata_conus_files))
print("SurfaceData window files: %d" % len(surfacedata_window_files))

FrontObject CONUS files: 5690
FrontObject window files: 256050
SurfaceData CONUS files: 5808
SurfaceData window files: 261219


In [4]:
print('Concatenating frontobject datasets....', end='')
frontobject_conus_dss = xr.concat(map(pd.read_pickle, frontobject_conus_files),
                                 dim='time')
print('done')
print('Concatenating surfacedata datasets....', end='')
surfacedata_conus_dss = xr.concat(map(pd.read_pickle, surfacedata_conus_files),
                                 dim='time')
print('done')
print(frontobject_conus_dss)
print(surfacedata_conus_dss)

Concatenating frontobject datasets....done
Concatenating surfacedata datasets....done
<xarray.Dataset>
Dimensions:     (latitude: 101, longitude: 181, time: 5690)
Coordinates:
  * latitude    (latitude) float64 50.0 49.75 49.5 49.25 ... 25.5 25.25 25.0
  * longitude   (longitude) float64 238.0 238.2 238.5 ... 282.5 282.8 283.0
  * time        (time) datetime64[ns] 2008-01-01 ... 2009-12-31T21:00:00
Data variables:
    identifier  (time, latitude, longitude) float64 0.0 0.0 0.0 ... 0.0 0.0 0.0
<xarray.Dataset>
Dimensions:    (latitude: 101, longitude: 181, time: 5808)
Coordinates:
  * time       (time) datetime64[ns] 2008-01-01 ... 2009-12-31T21:00:00
  * longitude  (longitude) float32 238.0 238.2 238.5 238.8 ... 282.5 282.8 283.0
  * latitude   (latitude) float32 50.0 49.75 49.5 49.25 ... 25.5 25.25 25.0
Data variables:
    d2m        (time, latitude, longitude) float32 263.4 264.2 ... 290.6 290.8
    sp         (time, latitude, longitude) float32 8.697e+04 ... 1.02e+05
    t2m        

In [5]:
# Create new dataset that only contains data with the same timestamp on both datasets. For example, in the code below, the 
# frontobject dataset has a smaller time dimension than the surfacedata array, so we will check to see which time stamps from
# the surfacedata array are present in the frontobject array. 

frontobject_time = frontobject_conus_dss.time.values
sfc_time = surfacedata_conus_dss.time.values

indices = []
for i in range(0,len(sfc_time)):
    index = np.where(sfc_time[i]==frontobject_time)
    if index[0].size != 0:
        indices.append(index[0][0])

# While merging the two datasets, we will select data whose timestamps are present in both datasets.
conus_df = xr.merge([surfacedata_conus_dss.sel(time=frontobject_time[indices]), frontobject_conus_dss]).to_dataframe() 
print(conus_df)

                                               d2m             sp         t2m  \
latitude longitude time                                                         
50.0     238.0     2008-01-01 00:00:00  263.354919   86966.656250  266.225647   
                   2008-01-01 03:00:00  261.674286   86920.085938  263.170258   
                   2008-01-01 06:00:00  259.960571   86893.218750  261.210358   
                   2008-01-01 09:00:00  258.088531   86807.242188  260.578979   
                   2008-01-01 12:00:00  260.273743   86672.007812  262.313812   
...                                            ...            ...         ...   
25.0     283.0     2009-12-31 09:00:00  289.395325  102062.312500  296.643921   
                   2009-12-31 12:00:00  289.686646  102126.640625  297.043457   
                   2009-12-31 15:00:00  289.871857  102293.203125  297.365814   
                   2009-12-31 18:00:00  290.410706  102119.593750  297.396698   
                   2009-12-3

In [13]:
conus_df.head()
inputs = conus_df.loc[:,['t2m','d2m','sp','theta_w','u10','v10']]

In [16]:
t2m = conus_df.t2m.values
d2m = conus_df.d2m.values
theta_w = conus_df.theta_w.values
sp = conus_df.sp.values
u10 = conus_df.u10.values
v10 = conus_df.v10.values
identifier = conus_df.identifier.values
print(conus_df['identifier'].shape)

(104018890,)


In [29]:
print(identifier.shape)
print(inputs.shape)

(104018890,)
(104018890, 6)


In [37]:
model = models.unet_2d((None, None, 6), [64, 128, 256, 512, 1024], n_labels=6,  stack_num_down=2, 
                        stack_num_up=1, activation='LeakyReLU', output_activation='Softmax', 
                        batch_norm=False, pool='max', unpool='nearest', name='unet')

In [38]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(inputs, conus_df['identifier'], validation_data=(inputs, conus_df['identifier']), epochs=100)

Epoch 1/100


ValueError: in user code:

    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\training.py:747 train_step
        y_pred = self(x, training=True)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:985 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\functional.py:385 call
        return self._run_internal_graph(
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\functional.py:508 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:975 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs,
    C:\Users\Andrew\anaconda3\envs\frontdetect\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:191 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer unet_down0_0 is incompatible with the layer: : expected min_ndim=4, found ndim=2. Full shape received: [None, 6]
